LCOV - code coverage report
Current view: top level - src/backend/utils/adt - tsvector_op.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13devel Lines: 877 1023 85.7 %
Date: 2019-09-19 02:07:14 Functions: 40 49 81.6 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * tsvector_op.c
       4             :  *    operations over tsvector
       5             :  *
       6             :  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
       7             :  *
       8             :  *
       9             :  * IDENTIFICATION
      10             :  *    src/backend/utils/adt/tsvector_op.c
      11             :  *
      12             :  *-------------------------------------------------------------------------
      13             :  */
      14             : #include "postgres.h"
      15             : 
      16             : #include <limits.h>
      17             : 
      18             : #include "access/htup_details.h"
      19             : #include "catalog/namespace.h"
      20             : #include "catalog/pg_type.h"
      21             : #include "commands/trigger.h"
      22             : #include "executor/spi.h"
      23             : #include "funcapi.h"
      24             : #include "mb/pg_wchar.h"
      25             : #include "miscadmin.h"
      26             : #include "parser/parse_coerce.h"
      27             : #include "tsearch/ts_utils.h"
      28             : #include "utils/array.h"
      29             : #include "utils/builtins.h"
      30             : #include "utils/lsyscache.h"
      31             : #include "utils/regproc.h"
      32             : #include "utils/rel.h"
      33             : 
      34             : 
      35             : typedef struct
      36             : {
      37             :     WordEntry  *arrb;
      38             :     WordEntry  *arre;
      39             :     char       *values;
      40             :     char       *operand;
      41             : } CHKVAL;
      42             : 
      43             : 
      44             : typedef struct StatEntry
      45             : {
      46             :     uint32      ndoc;           /* zero indicates that we were already here
      47             :                                  * while walking through the tree */
      48             :     uint32      nentry;
      49             :     struct StatEntry *left;
      50             :     struct StatEntry *right;
      51             :     uint32      lenlexeme;
      52             :     char        lexeme[FLEXIBLE_ARRAY_MEMBER];
      53             : } StatEntry;
      54             : 
      55             : #define STATENTRYHDRSZ  (offsetof(StatEntry, lexeme))
      56             : 
      57             : typedef struct
      58             : {
      59             :     int32       weight;
      60             : 
      61             :     uint32      maxdepth;
      62             : 
      63             :     StatEntry **stack;
      64             :     uint32      stackpos;
      65             : 
      66             :     StatEntry  *root;
      67             : } TSVectorStat;
      68             : 
      69             : static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
      70             : static int  tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len);
      71             : 
      72             : /*
      73             :  * Order: haspos, len, word, for all positions (pos, weight)
      74             :  */
      75             : static int
      76           2 : silly_cmp_tsvector(const TSVector a, const TSVector b)
      77             : {
      78           2 :     if (VARSIZE(a) < VARSIZE(b))
      79           0 :         return -1;
      80           2 :     else if (VARSIZE(a) > VARSIZE(b))
      81           0 :         return 1;
      82           2 :     else if (a->size < b->size)
      83           0 :         return -1;
      84           2 :     else if (a->size > b->size)
      85           0 :         return 1;
      86             :     else
      87             :     {
      88           2 :         WordEntry  *aptr = ARRPTR(a);
      89           2 :         WordEntry  *bptr = ARRPTR(b);
      90           2 :         int         i = 0;
      91             :         int         res;
      92             : 
      93             : 
      94           8 :         for (i = 0; i < a->size; i++)
      95             :         {
      96           6 :             if (aptr->haspos != bptr->haspos)
      97             :             {
      98           0 :                 return (aptr->haspos > bptr->haspos) ? -1 : 1;
      99             :             }
     100           6 :             else if ((res = tsCompareString(STRPTR(a) + aptr->pos, aptr->len, STRPTR(b) + bptr->pos, bptr->len, false)) != 0)
     101             :             {
     102           0 :                 return res;
     103             :             }
     104           6 :             else if (aptr->haspos)
     105             :             {
     106           0 :                 WordEntryPos *ap = POSDATAPTR(a, aptr);
     107           0 :                 WordEntryPos *bp = POSDATAPTR(b, bptr);
     108             :                 int         j;
     109             : 
     110           0 :                 if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr))
     111           0 :                     return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -1 : 1;
     112             : 
     113           0 :                 for (j = 0; j < POSDATALEN(a, aptr); j++)
     114             :                 {
     115           0 :                     if (WEP_GETPOS(*ap) != WEP_GETPOS(*bp))
     116             :                     {
     117           0 :                         return (WEP_GETPOS(*ap) > WEP_GETPOS(*bp)) ? -1 : 1;
     118             :                     }
     119           0 :                     else if (WEP_GETWEIGHT(*ap) != WEP_GETWEIGHT(*bp))
     120             :                     {
     121           0 :                         return (WEP_GETWEIGHT(*ap) > WEP_GETWEIGHT(*bp)) ? -1 : 1;
     122             :                     }
     123           0 :                     ap++, bp++;
     124             :                 }
     125             :             }
     126             : 
     127           6 :             aptr++;
     128           6 :             bptr++;
     129             :         }
     130             :     }
     131             : 
     132           2 :     return 0;
     133             : }
     134             : 
     135             : #define TSVECTORCMPFUNC( type, action, ret )            \
     136             : Datum                                                   \
     137             : tsvector_##type(PG_FUNCTION_ARGS)                       \
     138             : {                                                       \
     139             :     TSVector    a = PG_GETARG_TSVECTOR(0);              \
     140             :     TSVector    b = PG_GETARG_TSVECTOR(1);              \
     141             :     int         res = silly_cmp_tsvector(a, b);         \
     142             :     PG_FREE_IF_COPY(a,0);                               \
     143             :     PG_FREE_IF_COPY(b,1);                               \
     144             :     PG_RETURN_##ret( res action 0 );                    \
     145             : }   \
     146             : /* keep compiler quiet - no extra ; */                  \
     147             : extern int no_such_variable
     148             : 
     149           0 : TSVECTORCMPFUNC(lt, <, BOOL);
     150           0 : TSVECTORCMPFUNC(le, <=, BOOL);
     151           2 : TSVECTORCMPFUNC(eq, ==, BOOL);
     152           0 : TSVECTORCMPFUNC(ge, >=, BOOL);
     153           0 : TSVECTORCMPFUNC(gt, >, BOOL);
     154           0 : TSVECTORCMPFUNC(ne, !=, BOOL);
     155           0 : TSVECTORCMPFUNC(cmp, +, INT32);
     156             : 
     157             : Datum
     158          24 : tsvector_strip(PG_FUNCTION_ARGS)
     159             : {
     160          24 :     TSVector    in = PG_GETARG_TSVECTOR(0);
     161             :     TSVector    out;
     162             :     int         i,
     163          24 :                 len = 0;
     164          24 :     WordEntry  *arrin = ARRPTR(in),
     165             :                *arrout;
     166             :     char       *cur;
     167             : 
     168         100 :     for (i = 0; i < in->size; i++)
     169          76 :         len += arrin[i].len;
     170             : 
     171          24 :     len = CALCDATASIZE(in->size, len);
     172          24 :     out = (TSVector) palloc0(len);
     173          24 :     SET_VARSIZE(out, len);
     174          24 :     out->size = in->size;
     175          24 :     arrout = ARRPTR(out);
     176          24 :     cur = STRPTR(out);
     177         100 :     for (i = 0; i < in->size; i++)
     178             :     {
     179          76 :         memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len);
     180          76 :         arrout[i].haspos = 0;
     181          76 :         arrout[i].len = arrin[i].len;
     182          76 :         arrout[i].pos = cur - STRPTR(out);
     183          76 :         cur += arrout[i].len;
     184             :     }
     185             : 
     186          24 :     PG_FREE_IF_COPY(in, 0);
     187          24 :     PG_RETURN_POINTER(out);
     188             : }
     189             : 
     190             : Datum
     191           4 : tsvector_length(PG_FUNCTION_ARGS)
     192             : {
     193           4 :     TSVector    in = PG_GETARG_TSVECTOR(0);
     194           4 :     int32       ret = in->size;
     195             : 
     196           4 :     PG_FREE_IF_COPY(in, 0);
     197           4 :     PG_RETURN_INT32(ret);
     198             : }
     199             : 
     200             : Datum
     201           8 : tsvector_setweight(PG_FUNCTION_ARGS)
     202             : {
     203           8 :     TSVector    in = PG_GETARG_TSVECTOR(0);
     204           8 :     char        cw = PG_GETARG_CHAR(1);
     205             :     TSVector    out;
     206             :     int         i,
     207             :                 j;
     208             :     WordEntry  *entry;
     209             :     WordEntryPos *p;
     210           8 :     int         w = 0;
     211             : 
     212           8 :     switch (cw)
     213             :     {
     214             :         case 'A':
     215             :         case 'a':
     216           0 :             w = 3;
     217           0 :             break;
     218             :         case 'B':
     219             :         case 'b':
     220           0 :             w = 2;
     221           0 :             break;
     222             :         case 'C':
     223             :         case 'c':
     224           8 :             w = 1;
     225           8 :             break;
     226             :         case 'D':
     227             :         case 'd':
     228           0 :             w = 0;
     229           0 :             break;
     230             :         default:
     231             :             /* internal error */
     232           0 :             elog(ERROR, "unrecognized weight: %d", cw);
     233             :     }
     234             : 
     235           8 :     out = (TSVector) palloc(VARSIZE(in));
     236           8 :     memcpy(out, in, VARSIZE(in));
     237           8 :     entry = ARRPTR(out);
     238           8 :     i = out->size;
     239          48 :     while (i--)
     240             :     {
     241          32 :         if ((j = POSDATALEN(out, entry)) != 0)
     242             :         {
     243          32 :             p = POSDATAPTR(out, entry);
     244         144 :             while (j--)
     245             :             {
     246          80 :                 WEP_SETWEIGHT(*p, w);
     247          80 :                 p++;
     248             :             }
     249             :         }
     250          32 :         entry++;
     251             :     }
     252             : 
     253           8 :     PG_FREE_IF_COPY(in, 0);
     254           8 :     PG_RETURN_POINTER(out);
     255             : }
     256             : 
     257             : /*
     258             :  * setweight(tsin tsvector, char_weight "char", lexemes "text"[])
     259             :  *
     260             :  * Assign weight w to elements of tsin that are listed in lexemes.
     261             :  */
     262             : Datum
     263          20 : tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
     264             : {
     265          20 :     TSVector    tsin = PG_GETARG_TSVECTOR(0);
     266          20 :     char        char_weight = PG_GETARG_CHAR(1);
     267          20 :     ArrayType  *lexemes = PG_GETARG_ARRAYTYPE_P(2);
     268             : 
     269             :     TSVector    tsout;
     270             :     int         i,
     271             :                 j,
     272             :                 nlexemes,
     273             :                 weight;
     274             :     WordEntry  *entry;
     275             :     Datum      *dlexemes;
     276             :     bool       *nulls;
     277             : 
     278          20 :     switch (char_weight)
     279             :     {
     280             :         case 'A':
     281             :         case 'a':
     282           0 :             weight = 3;
     283           0 :             break;
     284             :         case 'B':
     285             :         case 'b':
     286           0 :             weight = 2;
     287           0 :             break;
     288             :         case 'C':
     289             :         case 'c':
     290          20 :             weight = 1;
     291          20 :             break;
     292             :         case 'D':
     293             :         case 'd':
     294           0 :             weight = 0;
     295           0 :             break;
     296             :         default:
     297             :             /* internal error */
     298           0 :             elog(ERROR, "unrecognized weight: %c", char_weight);
     299             :     }
     300             : 
     301          20 :     tsout = (TSVector) palloc(VARSIZE(tsin));
     302          20 :     memcpy(tsout, tsin, VARSIZE(tsin));
     303          20 :     entry = ARRPTR(tsout);
     304             : 
     305          20 :     deconstruct_array(lexemes, TEXTOID, -1, false, 'i',
     306             :                       &dlexemes, &nulls, &nlexemes);
     307             : 
     308             :     /*
     309             :      * Assuming that lexemes array is significantly shorter than tsvector we
     310             :      * can iterate through lexemes performing binary search of each lexeme
     311             :      * from lexemes in tsvector.
     312             :      */
     313          52 :     for (i = 0; i < nlexemes; i++)
     314             :     {
     315             :         char       *lex;
     316             :         int         lex_len,
     317             :                     lex_pos;
     318             : 
     319          36 :         if (nulls[i])
     320           4 :             ereport(ERROR,
     321             :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
     322             :                      errmsg("lexeme array may not contain nulls")));
     323             : 
     324          32 :         lex = VARDATA(dlexemes[i]);
     325          32 :         lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
     326          32 :         lex_pos = tsvector_bsearch(tsout, lex, lex_len);
     327             : 
     328          32 :         if (lex_pos >= 0 && (j = POSDATALEN(tsout, entry + lex_pos)) != 0)
     329             :         {
     330          16 :             WordEntryPos *p = POSDATAPTR(tsout, entry + lex_pos);
     331             : 
     332          68 :             while (j--)
     333             :             {
     334          36 :                 WEP_SETWEIGHT(*p, weight);
     335          36 :                 p++;
     336             :             }
     337             :         }
     338             :     }
     339             : 
     340          16 :     PG_FREE_IF_COPY(tsin, 0);
     341          16 :     PG_FREE_IF_COPY(lexemes, 2);
     342             : 
     343          16 :     PG_RETURN_POINTER(tsout);
     344             : }
     345             : 
     346             : #define compareEntry(pa, a, pb, b) \
     347             :     tsCompareString((pa) + (a)->pos, (a)->len,    \
     348             :                     (pb) + (b)->pos, (b)->len,    \
     349             :                     false)
     350             : 
     351             : /*
     352             :  * Add positions from src to dest after offsetting them by maxpos.
     353             :  * Return the number added (might be less than expected due to overflow)
     354             :  */
     355             : static int32
     356           8 : add_pos(TSVector src, WordEntry *srcptr,
     357             :         TSVector dest, WordEntry *destptr,
     358             :         int32 maxpos)
     359             : {
     360           8 :     uint16     *clen = &_POSVECPTR(dest, destptr)->npos;
     361             :     int         i;
     362           8 :     uint16      slen = POSDATALEN(src, srcptr),
     363             :                 startlen;
     364           8 :     WordEntryPos *spos = POSDATAPTR(src, srcptr),
     365           8 :                *dpos = POSDATAPTR(dest, destptr);
     366             : 
     367           8 :     if (!destptr->haspos)
     368           0 :         *clen = 0;
     369             : 
     370           8 :     startlen = *clen;
     371          24 :     for (i = 0;
     372          32 :          i < slen && *clen < MAXNUMPOS &&
     373          12 :          (*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1);
     374           8 :          i++)
     375             :     {
     376           8 :         WEP_SETWEIGHT(dpos[*clen], WEP_GETWEIGHT(spos[i]));
     377           8 :         WEP_SETPOS(dpos[*clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos));
     378           8 :         (*clen)++;
     379             :     }
     380             : 
     381           8 :     if (*clen != startlen)
     382           8 :         destptr->haspos = 1;
     383           8 :     return *clen - startlen;
     384             : }
     385             : 
     386             : /*
     387             :  * Perform binary search of given lexeme in TSVector.
     388             :  * Returns lexeme position in TSVector's entry array or -1 if lexeme wasn't
     389             :  * found.
     390             :  */
     391             : static int
     392         132 : tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len)
     393             : {
     394         132 :     WordEntry  *arrin = ARRPTR(tsv);
     395         132 :     int         StopLow = 0,
     396         132 :                 StopHigh = tsv->size,
     397             :                 StopMiddle,
     398             :                 cmp;
     399             : 
     400         468 :     while (StopLow < StopHigh)
     401             :     {
     402         304 :         StopMiddle = (StopLow + StopHigh) / 2;
     403             : 
     404         608 :         cmp = tsCompareString(lexeme, lexeme_len,
     405         304 :                               STRPTR(tsv) + arrin[StopMiddle].pos,
     406         304 :                               arrin[StopMiddle].len,
     407             :                               false);
     408             : 
     409         304 :         if (cmp < 0)
     410         128 :             StopHigh = StopMiddle;
     411         176 :         else if (cmp > 0)
     412          76 :             StopLow = StopMiddle + 1;
     413             :         else                    /* found it */
     414         100 :             return StopMiddle;
     415             :     }
     416             : 
     417          32 :     return -1;
     418             : }
     419             : 
     420             : /*
     421             :  * qsort comparator functions
     422             :  */
     423             : 
     424             : static int
     425          24 : compare_int(const void *va, const void *vb)
     426             : {
     427          24 :     int         a = *((const int *) va);
     428          24 :     int         b = *((const int *) vb);
     429             : 
     430          24 :     if (a == b)
     431           4 :         return 0;
     432          20 :     return (a > b) ? 1 : -1;
     433             : }
     434             : 
     435             : static int
     436          68 : compare_text_lexemes(const void *va, const void *vb)
     437             : {
     438          68 :     Datum       a = *((const Datum *) va);
     439          68 :     Datum       b = *((const Datum *) vb);
     440          68 :     char       *alex = VARDATA_ANY(a);
     441          68 :     int         alex_len = VARSIZE_ANY_EXHDR(a);
     442          68 :     char       *blex = VARDATA_ANY(b);
     443          68 :     int         blex_len = VARSIZE_ANY_EXHDR(b);
     444             : 
     445          68 :     return tsCompareString(alex, alex_len, blex, blex_len, false);
     446             : }
     447             : 
     448             : /*
     449             :  * Internal routine to delete lexemes from TSVector by array of offsets.
     450             :  *
     451             :  * int *indices_to_delete -- array of lexeme offsets to delete (modified here!)
     452             :  * int indices_count -- size of that array
     453             :  *
     454             :  * Returns new TSVector without given lexemes along with their positions
     455             :  * and weights.
     456             :  */
     457             : static TSVector
     458          40 : tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
     459             :                            int indices_count)
     460             : {
     461             :     TSVector    tsout;
     462          40 :     WordEntry  *arrin = ARRPTR(tsv),
     463             :                *arrout;
     464          40 :     char       *data = STRPTR(tsv),
     465             :                *dataout;
     466             :     int         i,              /* index in arrin */
     467             :                 j,              /* index in arrout */
     468             :                 k,              /* index in indices_to_delete */
     469             :                 curoff;         /* index in dataout area */
     470             : 
     471             :     /*
     472             :      * Sort the filter array to simplify membership checks below.  Also, get
     473             :      * rid of any duplicate entries, so that we can assume that indices_count
     474             :      * is exactly equal to the number of lexemes that will be removed.
     475             :      */
     476          40 :     if (indices_count > 1)
     477             :     {
     478             :         int         kp;
     479             : 
     480          16 :         qsort(indices_to_delete, indices_count, sizeof(int), compare_int);
     481          16 :         kp = 0;
     482          36 :         for (k = 1; k < indices_count; k++)
     483             :         {
     484          20 :             if (indices_to_delete[k] != indices_to_delete[kp])
     485          16 :                 indices_to_delete[++kp] = indices_to_delete[k];
     486             :         }
     487          16 :         indices_count = ++kp;
     488             :     }
     489             : 
     490             :     /*
     491             :      * Here we overestimate tsout size, since we don't know how much space is
     492             :      * used by the deleted lexeme(s).  We will set exact size below.
     493             :      */
     494          40 :     tsout = (TSVector) palloc0(VARSIZE(tsv));
     495             : 
     496             :     /* This count must be correct because STRPTR(tsout) relies on it. */
     497          40 :     tsout->size = tsv->size - indices_count;
     498             : 
     499             :     /*
     500             :      * Copy tsv to tsout, skipping lexemes listed in indices_to_delete.
     501             :      */
     502          40 :     arrout = ARRPTR(tsout);
     503          40 :     dataout = STRPTR(tsout);
     504          40 :     curoff = 0;
     505         240 :     for (i = j = k = 0; i < tsv->size; i++)
     506             :     {
     507             :         /*
     508             :          * If current i is present in indices_to_delete, skip this lexeme.
     509             :          * Since indices_to_delete is already sorted, we only need to check
     510             :          * the current (k'th) entry.
     511             :          */
     512         200 :         if (k < indices_count && i == indices_to_delete[k])
     513             :         {
     514          56 :             k++;
     515          56 :             continue;
     516             :         }
     517             : 
     518             :         /* Copy lexeme and its positions and weights */
     519         144 :         memcpy(dataout + curoff, data + arrin[i].pos, arrin[i].len);
     520         144 :         arrout[j].haspos = arrin[i].haspos;
     521         144 :         arrout[j].len = arrin[i].len;
     522         144 :         arrout[j].pos = curoff;
     523         144 :         curoff += arrin[i].len;
     524         144 :         if (arrin[i].haspos)
     525             :         {
     526         104 :             int         len = POSDATALEN(tsv, arrin + i) * sizeof(WordEntryPos)
     527             :             + sizeof(uint16);
     528             : 
     529         104 :             curoff = SHORTALIGN(curoff);
     530         208 :             memcpy(dataout + curoff,
     531         104 :                    STRPTR(tsv) + SHORTALIGN(arrin[i].pos + arrin[i].len),
     532             :                    len);
     533         104 :             curoff += len;
     534             :         }
     535             : 
     536         144 :         j++;
     537             :     }
     538             : 
     539             :     /*
     540             :      * k should now be exactly equal to indices_count. If it isn't then the
     541             :      * caller provided us with indices outside of [0, tsv->size) range and
     542             :      * estimation of tsout's size is wrong.
     543             :      */
     544             :     Assert(k == indices_count);
     545             : 
     546          40 :     SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, curoff));
     547          40 :     return tsout;
     548             : }
     549             : 
     550             : /*
     551             :  * Delete given lexeme from tsvector.
     552             :  * Implementation of user-level ts_delete(tsvector, text).
     553             :  */
     554             : Datum
     555          24 : tsvector_delete_str(PG_FUNCTION_ARGS)
     556             : {
     557          24 :     TSVector    tsin = PG_GETARG_TSVECTOR(0),
     558             :                 tsout;
     559          24 :     text       *tlexeme = PG_GETARG_TEXT_PP(1);
     560          24 :     char       *lexeme = VARDATA_ANY(tlexeme);
     561          24 :     int         lexeme_len = VARSIZE_ANY_EXHDR(tlexeme),
     562             :                 skip_index;
     563             : 
     564          24 :     if ((skip_index = tsvector_bsearch(tsin, lexeme, lexeme_len)) == -1)
     565           8 :         PG_RETURN_POINTER(tsin);
     566             : 
     567          16 :     tsout = tsvector_delete_by_indices(tsin, &skip_index, 1);
     568             : 
     569          16 :     PG_FREE_IF_COPY(tsin, 0);
     570          16 :     PG_FREE_IF_COPY(tlexeme, 1);
     571          16 :     PG_RETURN_POINTER(tsout);
     572             : }
     573             : 
     574             : /*
     575             :  * Delete given array of lexemes from tsvector.
     576             :  * Implementation of user-level ts_delete(tsvector, text[]).
     577             :  */
     578             : Datum
     579          28 : tsvector_delete_arr(PG_FUNCTION_ARGS)
     580             : {
     581          28 :     TSVector    tsin = PG_GETARG_TSVECTOR(0),
     582             :                 tsout;
     583          28 :     ArrayType  *lexemes = PG_GETARG_ARRAYTYPE_P(1);
     584             :     int         i,
     585             :                 nlex,
     586             :                 skip_count,
     587             :                *skip_indices;
     588             :     Datum      *dlexemes;
     589             :     bool       *nulls;
     590             : 
     591          28 :     deconstruct_array(lexemes, TEXTOID, -1, false, 'i',
     592             :                       &dlexemes, &nulls, &nlex);
     593             : 
     594             :     /*
     595             :      * In typical use case array of lexemes to delete is relatively small. So
     596             :      * here we optimize things for that scenario: iterate through lexarr
     597             :      * performing binary search of each lexeme from lexarr in tsvector.
     598             :      */
     599          28 :     skip_indices = palloc0(nlex * sizeof(int));
     600         104 :     for (i = skip_count = 0; i < nlex; i++)
     601             :     {
     602             :         char       *lex;
     603             :         int         lex_len,
     604             :                     lex_pos;
     605             : 
     606          80 :         if (nulls[i])
     607           4 :             ereport(ERROR,
     608             :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
     609             :                      errmsg("lexeme array may not contain nulls")));
     610             : 
     611          76 :         lex = VARDATA(dlexemes[i]);
     612          76 :         lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
     613          76 :         lex_pos = tsvector_bsearch(tsin, lex, lex_len);
     614             : 
     615          76 :         if (lex_pos >= 0)
     616          52 :             skip_indices[skip_count++] = lex_pos;
     617             :     }
     618             : 
     619          24 :     tsout = tsvector_delete_by_indices(tsin, skip_indices, skip_count);
     620             : 
     621          24 :     pfree(skip_indices);
     622          24 :     PG_FREE_IF_COPY(tsin, 0);
     623          24 :     PG_FREE_IF_COPY(lexemes, 1);
     624             : 
     625          24 :     PG_RETURN_POINTER(tsout);
     626             : }
     627             : 
     628             : /*
     629             :  * Expand tsvector as table with following columns:
     630             :  *     lexeme: lexeme text
     631             :  *     positions: integer array of lexeme positions
     632             :  *     weights: char array of weights corresponding to positions
     633             :  */
     634             : Datum
     635         120 : tsvector_unnest(PG_FUNCTION_ARGS)
     636             : {
     637             :     FuncCallContext *funcctx;
     638             :     TSVector    tsin;
     639             : 
     640         120 :     if (SRF_IS_FIRSTCALL())
     641             :     {
     642             :         MemoryContext oldcontext;
     643             :         TupleDesc   tupdesc;
     644             : 
     645          20 :         funcctx = SRF_FIRSTCALL_INIT();
     646          20 :         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
     647             : 
     648          20 :         tupdesc = CreateTemplateTupleDesc(3);
     649          20 :         TupleDescInitEntry(tupdesc, (AttrNumber) 1, "lexeme",
     650             :                            TEXTOID, -1, 0);
     651          20 :         TupleDescInitEntry(tupdesc, (AttrNumber) 2, "positions",
     652             :                            INT2ARRAYOID, -1, 0);
     653          20 :         TupleDescInitEntry(tupdesc, (AttrNumber) 3, "weights",
     654             :                            TEXTARRAYOID, -1, 0);
     655          20 :         funcctx->tuple_desc = BlessTupleDesc(tupdesc);
     656             : 
     657          20 :         funcctx->user_fctx = PG_GETARG_TSVECTOR_COPY(0);
     658             : 
     659          20 :         MemoryContextSwitchTo(oldcontext);
     660             :     }
     661             : 
     662         120 :     funcctx = SRF_PERCALL_SETUP();
     663         120 :     tsin = (TSVector) funcctx->user_fctx;
     664             : 
     665         120 :     if (funcctx->call_cntr < tsin->size)
     666             :     {
     667         100 :         WordEntry  *arrin = ARRPTR(tsin);
     668         100 :         char       *data = STRPTR(tsin);
     669             :         HeapTuple   tuple;
     670             :         int         j,
     671         100 :                     i = funcctx->call_cntr;
     672         100 :         bool        nulls[] = {false, false, false};
     673             :         Datum       values[3];
     674             : 
     675         100 :         values[0] = PointerGetDatum(
     676             :                                     cstring_to_text_with_len(data + arrin[i].pos, arrin[i].len)
     677             :             );
     678             : 
     679         100 :         if (arrin[i].haspos)
     680             :         {
     681             :             WordEntryPosVector *posv;
     682             :             Datum      *positions;
     683             :             Datum      *weights;
     684             :             char        weight;
     685             : 
     686             :             /*
     687             :              * Internally tsvector stores position and weight in the same
     688             :              * uint16 (2 bits for weight, 14 for position). Here we extract
     689             :              * that in two separate arrays.
     690             :              */
     691          60 :             posv = _POSVECPTR(tsin, arrin + i);
     692          60 :             positions = palloc(posv->npos * sizeof(Datum));
     693          60 :             weights = palloc(posv->npos * sizeof(Datum));
     694         168 :             for (j = 0; j < posv->npos; j++)
     695             :             {
     696         108 :                 positions[j] = Int16GetDatum(WEP_GETPOS(posv->pos[j]));
     697         108 :                 weight = 'D' - WEP_GETWEIGHT(posv->pos[j]);
     698         108 :                 weights[j] = PointerGetDatum(
     699             :                                              cstring_to_text_with_len(&weight, 1)
     700             :                     );
     701             :             }
     702             : 
     703          60 :             values[1] = PointerGetDatum(
     704             :                                         construct_array(positions, posv->npos, INT2OID, 2, true, 's'));
     705          60 :             values[2] = PointerGetDatum(
     706             :                                         construct_array(weights, posv->npos, TEXTOID, -1, false, 'i'));
     707             :         }
     708             :         else
     709             :         {
     710          40 :             nulls[1] = nulls[2] = true;
     711             :         }
     712             : 
     713         100 :         tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
     714         100 :         SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
     715             :     }
     716             :     else
     717             :     {
     718          20 :         pfree(tsin);
     719          20 :         SRF_RETURN_DONE(funcctx);
     720             :     }
     721             : }
     722             : 
     723             : /*
     724             :  * Convert tsvector to array of lexemes.
     725             :  */
     726             : Datum
     727           8 : tsvector_to_array(PG_FUNCTION_ARGS)
     728             : {
     729           8 :     TSVector    tsin = PG_GETARG_TSVECTOR(0);
     730           8 :     WordEntry  *arrin = ARRPTR(tsin);
     731             :     Datum      *elements;
     732             :     int         i;
     733             :     ArrayType  *array;
     734             : 
     735           8 :     elements = palloc(tsin->size * sizeof(Datum));
     736             : 
     737          48 :     for (i = 0; i < tsin->size; i++)
     738             :     {
     739          40 :         elements[i] = PointerGetDatum(
     740             :                                       cstring_to_text_with_len(STRPTR(tsin) + arrin[i].pos, arrin[i].len)
     741             :             );
     742             :     }
     743             : 
     744           8 :     array = construct_array(elements, tsin->size, TEXTOID, -1, false, 'i');
     745             : 
     746           8 :     pfree(elements);
     747           8 :     PG_FREE_IF_COPY(tsin, 0);
     748           8 :     PG_RETURN_POINTER(array);
     749             : }
     750             : 
     751             : /*
     752             :  * Build tsvector from array of lexemes.
     753             :  */
     754             : Datum
     755          12 : array_to_tsvector(PG_FUNCTION_ARGS)
     756             : {
     757          12 :     ArrayType  *v = PG_GETARG_ARRAYTYPE_P(0);
     758             :     TSVector    tsout;
     759             :     Datum      *dlexemes;
     760             :     WordEntry  *arrout;
     761             :     bool       *nulls;
     762             :     int         nitems,
     763             :                 i,
     764             :                 j,
     765             :                 tslen,
     766          12 :                 datalen = 0;
     767             :     char       *cur;
     768             : 
     769          12 :     deconstruct_array(v, TEXTOID, -1, false, 'i', &dlexemes, &nulls, &nitems);
     770             : 
     771             :     /* Reject nulls (maybe we should just ignore them, instead?) */
     772          64 :     for (i = 0; i < nitems; i++)
     773             :     {
     774          56 :         if (nulls[i])
     775           4 :             ereport(ERROR,
     776             :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
     777             :                      errmsg("lexeme array may not contain nulls")));
     778             :     }
     779             : 
     780             :     /* Sort and de-dup, because this is required for a valid tsvector. */
     781           8 :     if (nitems > 1)
     782             :     {
     783           8 :         qsort(dlexemes, nitems, sizeof(Datum), compare_text_lexemes);
     784           8 :         j = 0;
     785          36 :         for (i = 1; i < nitems; i++)
     786             :         {
     787          28 :             if (compare_text_lexemes(&dlexemes[j], &dlexemes[i]) < 0)
     788          24 :                 dlexemes[++j] = dlexemes[i];
     789             :         }
     790           8 :         nitems = ++j;
     791             :     }
     792             : 
     793             :     /* Calculate space needed for surviving lexemes. */
     794          40 :     for (i = 0; i < nitems; i++)
     795          32 :         datalen += VARSIZE(dlexemes[i]) - VARHDRSZ;
     796           8 :     tslen = CALCDATASIZE(nitems, datalen);
     797             : 
     798             :     /* Allocate and fill tsvector. */
     799           8 :     tsout = (TSVector) palloc0(tslen);
     800           8 :     SET_VARSIZE(tsout, tslen);
     801           8 :     tsout->size = nitems;
     802             : 
     803           8 :     arrout = ARRPTR(tsout);
     804           8 :     cur = STRPTR(tsout);
     805          40 :     for (i = 0; i < nitems; i++)
     806             :     {
     807          32 :         char       *lex = VARDATA(dlexemes[i]);
     808          32 :         int         lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
     809             : 
     810          32 :         memcpy(cur, lex, lex_len);
     811          32 :         arrout[i].haspos = 0;
     812          32 :         arrout[i].len = lex_len;
     813          32 :         arrout[i].pos = cur - STRPTR(tsout);
     814          32 :         cur += lex_len;
     815             :     }
     816             : 
     817           8 :     PG_FREE_IF_COPY(v, 0);
     818           8 :     PG_RETURN_POINTER(tsout);
     819             : }
     820             : 
     821             : /*
     822             :  * ts_filter(): keep only lexemes with given weights in tsvector.
     823             :  */
     824             : Datum
     825          12 : tsvector_filter(PG_FUNCTION_ARGS)
     826             : {
     827          12 :     TSVector    tsin = PG_GETARG_TSVECTOR(0),
     828             :                 tsout;
     829          12 :     ArrayType  *weights = PG_GETARG_ARRAYTYPE_P(1);
     830          12 :     WordEntry  *arrin = ARRPTR(tsin),
     831             :                *arrout;
     832          12 :     char       *datain = STRPTR(tsin),
     833             :                *dataout;
     834             :     Datum      *dweights;
     835             :     bool       *nulls;
     836             :     int         nweights;
     837             :     int         i,
     838             :                 j;
     839          12 :     int         cur_pos = 0;
     840          12 :     char        mask = 0;
     841             : 
     842          12 :     deconstruct_array(weights, CHAROID, 1, true, 'c',
     843             :                       &dweights, &nulls, &nweights);
     844             : 
     845          28 :     for (i = 0; i < nweights; i++)
     846             :     {
     847             :         char        char_weight;
     848             : 
     849          20 :         if (nulls[i])
     850           4 :             ereport(ERROR,
     851             :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
     852             :                      errmsg("weight array may not contain nulls")));
     853             : 
     854          16 :         char_weight = DatumGetChar(dweights[i]);
     855          16 :         switch (char_weight)
     856             :         {
     857             :             case 'A':
     858             :             case 'a':
     859          12 :                 mask = mask | 8;
     860          12 :                 break;
     861             :             case 'B':
     862             :             case 'b':
     863           4 :                 mask = mask | 4;
     864           4 :                 break;
     865             :             case 'C':
     866             :             case 'c':
     867           0 :                 mask = mask | 2;
     868           0 :                 break;
     869             :             case 'D':
     870             :             case 'd':
     871           0 :                 mask = mask | 1;
     872           0 :                 break;
     873             :             default:
     874           0 :                 ereport(ERROR,
     875             :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     876             :                          errmsg("unrecognized weight: \"%c\"", char_weight)));
     877             :         }
     878             :     }
     879             : 
     880           8 :     tsout = (TSVector) palloc0(VARSIZE(tsin));
     881           8 :     tsout->size = tsin->size;
     882           8 :     arrout = ARRPTR(tsout);
     883           8 :     dataout = STRPTR(tsout);
     884             : 
     885          72 :     for (i = j = 0; i < tsin->size; i++)
     886             :     {
     887             :         WordEntryPosVector *posvin,
     888             :                    *posvout;
     889          64 :         int         npos = 0;
     890             :         int         k;
     891             : 
     892          64 :         if (!arrin[i].haspos)
     893          20 :             continue;
     894             : 
     895          44 :         posvin = _POSVECPTR(tsin, arrin + i);
     896          44 :         posvout = (WordEntryPosVector *)
     897          44 :             (dataout + SHORTALIGN(cur_pos + arrin[i].len));
     898             : 
     899          88 :         for (k = 0; k < posvin->npos; k++)
     900             :         {
     901          44 :             if (mask & (1 << WEP_GETWEIGHT(posvin->pos[k])))
     902          20 :                 posvout->pos[npos++] = posvin->pos[k];
     903             :         }
     904             : 
     905             :         /* if no satisfactory positions found, skip lexeme */
     906          44 :         if (!npos)
     907          24 :             continue;
     908             : 
     909          20 :         arrout[j].haspos = true;
     910          20 :         arrout[j].len = arrin[i].len;
     911          20 :         arrout[j].pos = cur_pos;
     912             : 
     913          20 :         memcpy(dataout + cur_pos, datain + arrin[i].pos, arrin[i].len);
     914          20 :         posvout->npos = npos;
     915          20 :         cur_pos += SHORTALIGN(arrin[i].len);
     916          20 :         cur_pos += POSDATALEN(tsout, arrout + j) * sizeof(WordEntryPos) +
     917             :             sizeof(uint16);
     918          20 :         j++;
     919             :     }
     920             : 
     921           8 :     tsout->size = j;
     922           8 :     if (dataout != STRPTR(tsout))
     923           8 :         memmove(STRPTR(tsout), dataout, cur_pos);
     924             : 
     925           8 :     SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, cur_pos));
     926             : 
     927           8 :     PG_FREE_IF_COPY(tsin, 0);
     928           8 :     PG_RETURN_POINTER(tsout);
     929             : }
     930             : 
     931             : Datum
     932           8 : tsvector_concat(PG_FUNCTION_ARGS)
     933             : {
     934           8 :     TSVector    in1 = PG_GETARG_TSVECTOR(0);
     935           8 :     TSVector    in2 = PG_GETARG_TSVECTOR(1);
     936             :     TSVector    out;
     937             :     WordEntry  *ptr;
     938             :     WordEntry  *ptr1,
     939             :                *ptr2;
     940             :     WordEntryPos *p;
     941           8 :     int         maxpos = 0,
     942             :                 i,
     943             :                 j,
     944             :                 i1,
     945             :                 i2,
     946             :                 dataoff,
     947             :                 output_bytes,
     948             :                 output_size;
     949             :     char       *data,
     950             :                *data1,
     951             :                *data2;
     952             : 
     953             :     /* Get max position in in1; we'll need this to offset in2's positions */
     954           8 :     ptr = ARRPTR(in1);
     955           8 :     i = in1->size;
     956          28 :     while (i--)
     957             :     {
     958          12 :         if ((j = POSDATALEN(in1, ptr)) != 0)
     959             :         {
     960          12 :             p = POSDATAPTR(in1, ptr);
     961          36 :             while (j--)
     962             :             {
     963          12 :                 if (WEP_GETPOS(*p) > maxpos)
     964           8 :                     maxpos = WEP_GETPOS(*p);
     965          12 :                 p++;
     966             :             }
     967             :         }
     968          12 :         ptr++;
     969             :     }
     970             : 
     971           8 :     ptr1 = ARRPTR(in1);
     972           8 :     ptr2 = ARRPTR(in2);
     973           8 :     data1 = STRPTR(in1);
     974           8 :     data2 = STRPTR(in2);
     975           8 :     i1 = in1->size;
     976           8 :     i2 = in2->size;
     977             : 
     978             :     /*
     979             :      * Conservative estimate of space needed.  We might need all the data in
     980             :      * both inputs, and conceivably add a pad byte before position data for
     981             :      * each item where there was none before.
     982             :      */
     983           8 :     output_bytes = VARSIZE(in1) + VARSIZE(in2) + i1 + i2;
     984             : 
     985           8 :     out = (TSVector) palloc0(output_bytes);
     986           8 :     SET_VARSIZE(out, output_bytes);
     987             : 
     988             :     /*
     989             :      * We must make out->size valid so that STRPTR(out) is sensible.  We'll
     990             :      * collapse out any unused space at the end.
     991             :      */
     992           8 :     out->size = in1->size + in2->size;
     993             : 
     994           8 :     ptr = ARRPTR(out);
     995           8 :     data = STRPTR(out);
     996           8 :     dataoff = 0;
     997          28 :     while (i1 && i2)
     998             :     {
     999          12 :         int         cmp = compareEntry(data1, ptr1, data2, ptr2);
    1000             : 
    1001          12 :         if (cmp < 0)
    1002             :         {                       /* in1 first */
    1003           4 :             ptr->haspos = ptr1->haspos;
    1004           4 :             ptr->len = ptr1->len;
    1005           4 :             memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
    1006           4 :             ptr->pos = dataoff;
    1007           4 :             dataoff += ptr1->len;
    1008           4 :             if (ptr->haspos)
    1009             :             {
    1010           4 :                 dataoff = SHORTALIGN(dataoff);
    1011           4 :                 memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
    1012           4 :                 dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
    1013             :             }
    1014             : 
    1015           4 :             ptr++;
    1016           4 :             ptr1++;
    1017           4 :             i1--;
    1018             :         }
    1019           8 :         else if (cmp > 0)
    1020             :         {                       /* in2 first */
    1021           4 :             ptr->haspos = ptr2->haspos;
    1022           4 :             ptr->len = ptr2->len;
    1023           4 :             memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
    1024           4 :             ptr->pos = dataoff;
    1025           4 :             dataoff += ptr2->len;
    1026           4 :             if (ptr->haspos)
    1027             :             {
    1028           0 :                 int         addlen = add_pos(in2, ptr2, out, ptr, maxpos);
    1029             : 
    1030           0 :                 if (addlen == 0)
    1031           0 :                     ptr->haspos = 0;
    1032             :                 else
    1033             :                 {
    1034           0 :                     dataoff = SHORTALIGN(dataoff);
    1035           0 :                     dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
    1036             :                 }
    1037             :             }
    1038             : 
    1039           4 :             ptr++;
    1040           4 :             ptr2++;
    1041           4 :             i2--;
    1042             :         }
    1043             :         else
    1044             :         {
    1045           4 :             ptr->haspos = ptr1->haspos | ptr2->haspos;
    1046           4 :             ptr->len = ptr1->len;
    1047           4 :             memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
    1048           4 :             ptr->pos = dataoff;
    1049           4 :             dataoff += ptr1->len;
    1050           4 :             if (ptr->haspos)
    1051             :             {
    1052           4 :                 if (ptr1->haspos)
    1053             :                 {
    1054           4 :                     dataoff = SHORTALIGN(dataoff);
    1055           4 :                     memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
    1056           4 :                     dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
    1057           4 :                     if (ptr2->haspos)
    1058           4 :                         dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
    1059             :                 }
    1060             :                 else            /* must have ptr2->haspos */
    1061             :                 {
    1062           0 :                     int         addlen = add_pos(in2, ptr2, out, ptr, maxpos);
    1063             : 
    1064           0 :                     if (addlen == 0)
    1065           0 :                         ptr->haspos = 0;
    1066             :                     else
    1067             :                     {
    1068           0 :                         dataoff = SHORTALIGN(dataoff);
    1069           0 :                         dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
    1070             :                     }
    1071             :                 }
    1072             :             }
    1073             : 
    1074           4 :             ptr++;
    1075           4 :             ptr1++;
    1076           4 :             ptr2++;
    1077           4 :             i1--;
    1078           4 :             i2--;
    1079             :         }
    1080             :     }
    1081             : 
    1082          20 :     while (i1)
    1083             :     {
    1084           4 :         ptr->haspos = ptr1->haspos;
    1085           4 :         ptr->len = ptr1->len;
    1086           4 :         memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
    1087           4 :         ptr->pos = dataoff;
    1088           4 :         dataoff += ptr1->len;
    1089           4 :         if (ptr->haspos)
    1090             :         {
    1091           4 :             dataoff = SHORTALIGN(dataoff);
    1092           4 :             memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
    1093           4 :             dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
    1094             :         }
    1095             : 
    1096           4 :         ptr++;
    1097           4 :         ptr1++;
    1098           4 :         i1--;
    1099             :     }
    1100             : 
    1101          20 :     while (i2)
    1102             :     {
    1103           4 :         ptr->haspos = ptr2->haspos;
    1104           4 :         ptr->len = ptr2->len;
    1105           4 :         memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
    1106           4 :         ptr->pos = dataoff;
    1107           4 :         dataoff += ptr2->len;
    1108           4 :         if (ptr->haspos)
    1109             :         {
    1110           4 :             int         addlen = add_pos(in2, ptr2, out, ptr, maxpos);
    1111             : 
    1112           4 :             if (addlen == 0)
    1113           0 :                 ptr->haspos = 0;
    1114             :             else
    1115             :             {
    1116           4 :                 dataoff = SHORTALIGN(dataoff);
    1117           4 :                 dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
    1118             :             }
    1119             :         }
    1120             : 
    1121           4 :         ptr++;
    1122           4 :         ptr2++;
    1123           4 :         i2--;
    1124             :     }
    1125             : 
    1126             :     /*
    1127             :      * Instead of checking each offset individually, we check for overflow of
    1128             :      * pos fields once at the end.
    1129             :      */
    1130           8 :     if (dataoff > MAXSTRPOS)
    1131           0 :         ereport(ERROR,
    1132             :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
    1133             :                  errmsg("string is too long for tsvector (%d bytes, max %d bytes)", dataoff, MAXSTRPOS)));
    1134             : 
    1135             :     /*
    1136             :      * Adjust sizes (asserting that we didn't overrun the original estimates)
    1137             :      * and collapse out any unused array entries.
    1138             :      */
    1139           8 :     output_size = ptr - ARRPTR(out);
    1140             :     Assert(output_size <= out->size);
    1141           8 :     out->size = output_size;
    1142           8 :     if (data != STRPTR(out))
    1143           4 :         memmove(STRPTR(out), data, dataoff);
    1144           8 :     output_bytes = CALCDATASIZE(out->size, dataoff);
    1145             :     Assert(output_bytes <= VARSIZE(out));
    1146           8 :     SET_VARSIZE(out, output_bytes);
    1147             : 
    1148           8 :     PG_FREE_IF_COPY(in1, 0);
    1149           8 :     PG_FREE_IF_COPY(in2, 1);
    1150           8 :     PG_RETURN_POINTER(out);
    1151             : }
    1152             : 
    1153             : /*
    1154             :  * Compare two strings by tsvector rules.
    1155             :  *
    1156             :  * if prefix = true then it returns zero value iff b has prefix a
    1157             :  */
    1158             : int32
    1159     3372842 : tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
    1160             : {
    1161             :     int         cmp;
    1162             : 
    1163     3372842 :     if (lena == 0)
    1164             :     {
    1165           0 :         if (prefix)
    1166           0 :             cmp = 0;            /* empty string is prefix of anything */
    1167             :         else
    1168           0 :             cmp = (lenb > 0) ? -1 : 0;
    1169             :     }
    1170     3372842 :     else if (lenb == 0)
    1171             :     {
    1172           0 :         cmp = (lena > 0) ? 1 : 0;
    1173             :     }
    1174             :     else
    1175             :     {
    1176     3372842 :         cmp = memcmp(a, b, Min(lena, lenb));
    1177             : 
    1178     3372842 :         if (prefix)
    1179             :         {
    1180        6720 :             if (cmp == 0 && lena > lenb)
    1181           0 :                 cmp = 1;        /* a is longer, so not a prefix of b */
    1182             :         }
    1183     3366122 :         else if (cmp == 0 && lena != lenb)
    1184             :         {
    1185       13220 :             cmp = (lena < lenb) ? -1 : 1;
    1186             :         }
    1187             :     }
    1188             : 
    1189     3372842 :     return cmp;
    1190             : }
    1191             : 
    1192             : /*
    1193             :  * Check weight info or/and fill 'data' with the required positions
    1194             :  */
    1195             : static bool
    1196       14692 : checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
    1197             :                ExecPhraseData *data)
    1198             : {
    1199       14692 :     bool        result = false;
    1200             : 
    1201       14692 :     if (entry->haspos && (val->weight || data))
    1202         544 :     {
    1203             :         WordEntryPosVector *posvec;
    1204             : 
    1205             :         /*
    1206             :          * We can't use the _POSVECPTR macro here because the pointer to the
    1207             :          * tsvector's lexeme storage is already contained in chkval->values.
    1208             :          */
    1209         544 :         posvec = (WordEntryPosVector *)
    1210         544 :             (chkval->values + SHORTALIGN(entry->pos + entry->len));
    1211             : 
    1212         544 :         if (val->weight && data)
    1213           0 :         {
    1214           0 :             WordEntryPos *posvec_iter = posvec->pos;
    1215             :             WordEntryPos *dptr;
    1216             : 
    1217             :             /*
    1218             :              * Filter position information by weights
    1219             :              */
    1220           0 :             dptr = data->pos = palloc(sizeof(WordEntryPos) * posvec->npos);
    1221           0 :             data->allocated = true;
    1222             : 
    1223             :             /* Is there a position with a matching weight? */
    1224           0 :             while (posvec_iter < posvec->pos + posvec->npos)
    1225             :             {
    1226             :                 /* If true, append this position to the data->pos */
    1227           0 :                 if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
    1228             :                 {
    1229           0 :                     *dptr = WEP_GETPOS(*posvec_iter);
    1230           0 :                     dptr++;
    1231             :                 }
    1232             : 
    1233           0 :                 posvec_iter++;
    1234             :             }
    1235             : 
    1236           0 :             data->npos = dptr - data->pos;
    1237             : 
    1238           0 :             if (data->npos > 0)
    1239           0 :                 result = true;
    1240             :         }
    1241         544 :         else if (val->weight)
    1242             :         {
    1243          84 :             WordEntryPos *posvec_iter = posvec->pos;
    1244             : 
    1245             :             /* Is there a position with a matching weight? */
    1246         216 :             while (posvec_iter < posvec->pos + posvec->npos)
    1247             :             {
    1248         116 :                 if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
    1249             :                 {
    1250          68 :                     result = true;
    1251          68 :                     break;      /* no need to go further */
    1252             :                 }
    1253             : 
    1254          48 :                 posvec_iter++;
    1255             :             }
    1256             :         }
    1257             :         else                    /* data != NULL */
    1258             :         {
    1259         460 :             data->npos = posvec->npos;
    1260         460 :             data->pos = posvec->pos;
    1261         460 :             data->allocated = false;
    1262         460 :             result = true;
    1263             :         }
    1264             :     }
    1265             :     else
    1266             :     {
    1267       14148 :         result = true;
    1268             :     }
    1269             : 
    1270       14692 :     return result;
    1271             : }
    1272             : 
    1273             : /*
    1274             :  * Removes duplicate pos entries. We can't use uniquePos() from
    1275             :  * tsvector.c because array might be longer than MAXENTRYPOS
    1276             :  *
    1277             :  * Returns new length.
    1278             :  */
    1279             : static int
    1280           8 : uniqueLongPos(WordEntryPos *pos, int npos)
    1281             : {
    1282             :     WordEntryPos *pos_iter,
    1283             :                *result;
    1284             : 
    1285           8 :     if (npos <= 1)
    1286           4 :         return npos;
    1287             : 
    1288           4 :     qsort((void *) pos, npos, sizeof(WordEntryPos), compareWordEntryPos);
    1289             : 
    1290           4 :     result = pos;
    1291           4 :     pos_iter = pos + 1;
    1292          12 :     while (pos_iter < pos + npos)
    1293             :     {
    1294           4 :         if (WEP_GETPOS(*pos_iter) != WEP_GETPOS(*result))
    1295             :         {
    1296           4 :             result++;
    1297           4 :             *result = WEP_GETPOS(*pos_iter);
    1298             :         }
    1299             : 
    1300           4 :         pos_iter++;
    1301             :     }
    1302             : 
    1303           4 :     return result + 1 - pos;
    1304             : }
    1305             : 
    1306             : /*
    1307             :  * is there value 'val' in array or not ?
    1308             :  */
    1309             : static bool
    1310       49968 : checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
    1311             : {
    1312       49968 :     CHKVAL     *chkval = (CHKVAL *) checkval;
    1313       49968 :     WordEntry  *StopLow = chkval->arrb;
    1314       49968 :     WordEntry  *StopHigh = chkval->arre;
    1315       49968 :     WordEntry  *StopMiddle = StopHigh;
    1316       49968 :     int         difference = -1;
    1317       49968 :     bool        res = false;
    1318             : 
    1319             :     /* Loop invariant: StopLow <= val < StopHigh */
    1320      363396 :     while (StopLow < StopHigh)
    1321             :     {
    1322      272156 :         StopMiddle = StopLow + (StopHigh - StopLow) / 2;
    1323      816468 :         difference = tsCompareString(chkval->operand + val->distance,
    1324      272156 :                                      val->length,
    1325      272156 :                                      chkval->values + StopMiddle->pos,
    1326      272156 :                                      StopMiddle->len,
    1327             :                                      false);
    1328             : 
    1329      272156 :         if (difference == 0)
    1330             :         {
    1331             :             /* Check weight info & fill 'data' with positions */
    1332        8696 :             res = checkclass_str(chkval, StopMiddle, val, data);
    1333        8696 :             break;
    1334             :         }
    1335      263460 :         else if (difference > 0)
    1336      140908 :             StopLow = StopMiddle + 1;
    1337             :         else
    1338      122552 :             StopHigh = StopMiddle;
    1339             :     }
    1340             : 
    1341       49968 :     if ((!res || data) && val->prefix)
    1342             :     {
    1343        6608 :         WordEntryPos *allpos = NULL;
    1344        6608 :         int         npos = 0,
    1345        6608 :                     totalpos = 0;
    1346             : 
    1347             :         /*
    1348             :          * there was a failed exact search, so we should scan further to find
    1349             :          * a prefix match. We also need to do so if caller needs position info
    1350             :          */
    1351        6608 :         if (StopLow >= StopHigh)
    1352        6600 :             StopMiddle = StopHigh;
    1353             : 
    1354       25580 :         while ((!res || data) && StopMiddle < chkval->arre &&
    1355       19104 :                tsCompareString(chkval->operand + val->distance,
    1356        6368 :                                val->length,
    1357        6368 :                                chkval->values + StopMiddle->pos,
    1358        6368 :                                StopMiddle->len,
    1359             :                                true) == 0)
    1360             :         {
    1361        5996 :             if (data)
    1362             :             {
    1363             :                 /*
    1364             :                  * We need to join position information
    1365             :                  */
    1366          12 :                 res = checkclass_str(chkval, StopMiddle, val, data);
    1367             : 
    1368          12 :                 if (res)
    1369             :                 {
    1370          32 :                     while (npos + data->npos >= totalpos)
    1371             :                     {
    1372           8 :                         if (totalpos == 0)
    1373             :                         {
    1374           8 :                             totalpos = 256;
    1375           8 :                             allpos = palloc(sizeof(WordEntryPos) * totalpos);
    1376             :                         }
    1377             :                         else
    1378             :                         {
    1379           0 :                             totalpos *= 2;
    1380           0 :                             allpos = repalloc(allpos, sizeof(WordEntryPos) * totalpos);
    1381             :                         }
    1382             :                     }
    1383             : 
    1384          12 :                     memcpy(allpos + npos, data->pos, sizeof(WordEntryPos) * data->npos);
    1385          12 :                     npos += data->npos;
    1386             :                 }
    1387             :             }
    1388             :             else
    1389             :             {
    1390        5984 :                 res = checkclass_str(chkval, StopMiddle, val, NULL);
    1391             :             }
    1392             : 
    1393        5996 :             StopMiddle++;
    1394             :         }
    1395             : 
    1396        6608 :         if (res && data)
    1397             :         {
    1398             :             /* Sort and make unique array of found positions */
    1399           8 :             data->pos = allpos;
    1400           8 :             data->npos = uniqueLongPos(allpos, npos);
    1401           8 :             data->allocated = true;
    1402             :         }
    1403             :     }
    1404             : 
    1405       49968 :     return res;
    1406             : }
    1407             : 
    1408             : /*
    1409             :  * Compute output position list for a tsquery operator in phrase mode.
    1410             :  *
    1411             :  * Merge the position lists in Ldata and Rdata as specified by "emit",
    1412             :  * returning the result list into *data.  The input position lists must be
    1413             :  * sorted and unique, and the output will be as well.
    1414             :  *
    1415             :  * data: pointer to initially-all-zeroes output struct, or NULL
    1416             :  * Ldata, Rdata: input position lists
    1417             :  * emit: bitmask of TSPO_XXX flags
    1418             :  * Loffset: offset to be added to Ldata positions before comparing/outputting
    1419             :  * Roffset: offset to be added to Rdata positions before comparing/outputting
    1420             :  * max_npos: maximum possible required size of output position array
    1421             :  *
    1422             :  * Loffset and Roffset should not be negative, else we risk trying to output
    1423             :  * negative positions, which won't fit into WordEntryPos.
    1424             :  *
    1425             :  * Returns true if any positions were emitted to *data; or if data is NULL,
    1426             :  * returns true if any positions would have been emitted.
    1427             :  */
    1428             : #define TSPO_L_ONLY     0x01    /* emit positions appearing only in L */
    1429             : #define TSPO_R_ONLY     0x02    /* emit positions appearing only in R */
    1430             : #define TSPO_BOTH       0x04    /* emit positions appearing in both L&R */
    1431             : 
    1432             : static bool
    1433         436 : TS_phrase_output(ExecPhraseData *data,
    1434             :                  ExecPhraseData *Ldata,
    1435             :                  ExecPhraseData *Rdata,
    1436             :                  int emit,
    1437             :                  int Loffset,
    1438             :                  int Roffset,
    1439             :                  int max_npos)
    1440             : {
    1441             :     int         Lindex,
    1442             :                 Rindex;
    1443             : 
    1444             :     /* Loop until both inputs are exhausted */
    1445         436 :     Lindex = Rindex = 0;
    1446        1168 :     while (Lindex < Ldata->npos || Rindex < Rdata->npos)
    1447             :     {
    1448             :         int         Lpos,
    1449             :                     Rpos;
    1450         584 :         int         output_pos = 0;
    1451             : 
    1452             :         /*
    1453             :          * Fetch current values to compare.  WEP_GETPOS() is needed because
    1454             :          * ExecPhraseData->data can point to a tsvector's WordEntryPosVector.
    1455             :          */
    1456         584 :         if (Lindex < Ldata->npos)
    1457         492 :             Lpos = WEP_GETPOS(Ldata->pos[Lindex]) + Loffset;
    1458             :         else
    1459             :         {
    1460             :             /* L array exhausted, so we're done if R_ONLY isn't set */
    1461          92 :             if (!(emit & TSPO_R_ONLY))
    1462          44 :                 break;
    1463          48 :             Lpos = INT_MAX;
    1464             :         }
    1465         540 :         if (Rindex < Rdata->npos)
    1466         444 :             Rpos = WEP_GETPOS(Rdata->pos[Rindex]) + Roffset;
    1467             :         else
    1468             :         {
    1469             :             /* R array exhausted, so we're done if L_ONLY isn't set */
    1470          96 :             if (!(emit & TSPO_L_ONLY))
    1471          44 :                 break;
    1472          52 :             Rpos = INT_MAX;
    1473             :         }
    1474             : 
    1475             :         /* Merge-join the two input lists */
    1476         496 :         if (Lpos < Rpos)
    1477             :         {
    1478             :             /* Lpos is not matched in Rdata, should we output it? */
    1479         136 :             if (emit & TSPO_L_ONLY)
    1480          68 :                 output_pos = Lpos;
    1481         136 :             Lindex++;
    1482             :         }
    1483         360 :         else if (Lpos == Rpos)
    1484             :         {
    1485             :             /* Lpos and Rpos match ... should we output it? */
    1486         260 :             if (emit & TSPO_BOTH)
    1487         252 :                 output_pos = Rpos;
    1488         260 :             Lindex++;
    1489         260 :             Rindex++;
    1490             :         }
    1491             :         else                    /* Lpos > Rpos */
    1492             :         {
    1493             :             /* Rpos is not matched in Ldata, should we output it? */
    1494         100 :             if (emit & TSPO_R_ONLY)
    1495          48 :                 output_pos = Rpos;
    1496         100 :             Rindex++;
    1497             :         }
    1498             : 
    1499         496 :         if (output_pos > 0)
    1500             :         {
    1501         368 :             if (data)
    1502             :             {
    1503             :                 /* Store position, first allocating output array if needed */
    1504         168 :                 if (data->pos == NULL)
    1505             :                 {
    1506         136 :                     data->pos = (WordEntryPos *)
    1507         136 :                         palloc(max_npos * sizeof(WordEntryPos));
    1508         136 :                     data->allocated = true;
    1509             :                 }
    1510         168 :                 data->pos[data->npos++] = output_pos;
    1511             :             }
    1512             :             else
    1513             :             {
    1514             :                 /*
    1515             :                  * Exact positions not needed, so return true as soon as we
    1516             :                  * know there is at least one.
    1517             :                  */
    1518         200 :                 return true;
    1519             :             }
    1520             :         }
    1521             :     }
    1522             : 
    1523         236 :     if (data && data->npos > 0)
    1524             :     {
    1525             :         /* Let's assert we didn't overrun the array */
    1526             :         Assert(data->npos <= max_npos);
    1527         136 :         return true;
    1528             :     }
    1529         100 :     return false;
    1530             : }
    1531             : 
    1532             : /*
    1533             :  * Execute tsquery at or below an OP_PHRASE operator.
    1534             :  *
    1535             :  * This handles tsquery execution at recursion levels where we need to care
    1536             :  * about match locations.
    1537             :  *
    1538             :  * In addition to the same arguments used for TS_execute, the caller may pass
    1539             :  * a preinitialized-to-zeroes ExecPhraseData struct, to be filled with lexeme
    1540             :  * match position info on success.  data == NULL if no position data need be
    1541             :  * returned.  (In practice, outside callers pass NULL, and only the internal
    1542             :  * recursion cases pass a data pointer.)
    1543             :  * Note: the function assumes data != NULL for operators other than OP_PHRASE.
    1544             :  * This is OK because an outside call always starts from an OP_PHRASE node.
    1545             :  *
    1546             :  * The detailed semantics of the match data, given that the function returned
    1547             :  * "true" (successful match, or possible match), are:
    1548             :  *
    1549             :  * npos > 0, negate = false:
    1550             :  *   query is matched at specified position(s) (and only those positions)
    1551             :  * npos > 0, negate = true:
    1552             :  *   query is matched at all positions *except* specified position(s)
    1553             :  * npos = 0, negate = false:
    1554             :  *   query is possibly matched, matching position(s) are unknown
    1555             :  *   (this should only be returned when TS_EXEC_PHRASE_NO_POS flag is set)
    1556             :  * npos = 0, negate = true:
    1557             :  *   query is matched at all positions
    1558             :  *
    1559             :  * Successful matches also return a "width" value which is the match width in
    1560             :  * lexemes, less one.  Hence, "width" is zero for simple one-lexeme matches,
    1561             :  * and is the sum of the phrase operator distances for phrase matches.  Note
    1562             :  * that when width > 0, the listed positions represent the ends of matches not
    1563             :  * the starts.  (This unintuitive rule is needed to avoid possibly generating
    1564             :  * negative positions, which wouldn't fit into the WordEntryPos arrays.)
    1565             :  *
    1566             :  * When the function returns "false" (no match), it must return npos = 0,
    1567             :  * negate = false (which is the state initialized by the caller); but the
    1568             :  * "width" output in such cases is undefined.
    1569             :  */
    1570             : static bool
    1571        1748 : TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags,
    1572             :                   TSExecuteCallback chkcond,
    1573             :                   ExecPhraseData *data)
    1574             : {
    1575             :     ExecPhraseData Ldata,
    1576             :                 Rdata;
    1577             :     bool        lmatch,
    1578             :                 rmatch;
    1579             :     int         Loffset,
    1580             :                 Roffset,
    1581             :                 maxwidth;
    1582             : 
    1583             :     /* since this function recurses, it could be driven to stack overflow */
    1584        1748 :     check_stack_depth();
    1585             : 
    1586        1748 :     if (curitem->type == QI_VAL)
    1587        1020 :         return chkcond(arg, (QueryOperand *) curitem, data);
    1588             : 
    1589         728 :     switch (curitem->qoperator.oper)
    1590             :     {
    1591             :         case OP_NOT:
    1592             : 
    1593             :             /*
    1594             :              * Because a "true" result with no specific positions is taken as
    1595             :              * uncertain, we need no special care here for !TS_EXEC_CALC_NOT.
    1596             :              * If it's a false positive, the right things happen anyway.
    1597             :              *
    1598             :              * Also, we need not touch data->width, since a NOT operation does
    1599             :              * not change the match width.
    1600             :              */
    1601          24 :             if (TS_phrase_execute(curitem + 1, arg, flags, chkcond, data))
    1602             :             {
    1603          12 :                 if (data->npos > 0)
    1604             :                 {
    1605             :                     /* we have some positions, invert negate flag */
    1606          12 :                     data->negate = !data->negate;
    1607          12 :                     return true;
    1608             :                 }
    1609           0 :                 else if (data->negate)
    1610             :                 {
    1611             :                     /* change "match everywhere" to "match nowhere" */
    1612           0 :                     data->negate = false;
    1613           0 :                     return false;
    1614             :                 }
    1615             :                 /* match positions are, and remain, uncertain */
    1616           0 :                 return true;
    1617             :             }
    1618             :             else
    1619             :             {
    1620             :                 /* change "match nowhere" to "match everywhere" */
    1621             :                 Assert(data->npos == 0 && !data->negate);
    1622          12 :                 data->negate = true;
    1623          12 :                 return true;
    1624             :             }
    1625             : 
    1626             :         case OP_PHRASE:
    1627             :         case OP_AND:
    1628         616 :             memset(&Ldata, 0, sizeof(Ldata));
    1629         616 :             memset(&Rdata, 0, sizeof(Rdata));
    1630             : 
    1631         616 :             if (!TS_phrase_execute(curitem + curitem->qoperator.left,
    1632             :                                    arg, flags, chkcond, &Ldata))
    1633         156 :                 return false;
    1634             : 
    1635         460 :             if (!TS_phrase_execute(curitem + 1,
    1636             :                                    arg, flags, chkcond, &Rdata))
    1637          96 :                 return false;
    1638             : 
    1639             :             /*
    1640             :              * If either operand has no position information, then we can't
    1641             :              * return position data, only a "possible match" result. "Possible
    1642             :              * match" answers are only wanted when TS_EXEC_PHRASE_NO_POS flag
    1643             :              * is set, otherwise return false.
    1644             :              */
    1645         720 :             if ((Ldata.npos == 0 && !Ldata.negate) ||
    1646         360 :                 (Rdata.npos == 0 && !Rdata.negate))
    1647           8 :                 return (flags & TS_EXEC_PHRASE_NO_POS) ? true : false;
    1648             : 
    1649         356 :             if (curitem->qoperator.oper == OP_PHRASE)
    1650             :             {
    1651             :                 /*
    1652             :                  * Compute Loffset and Roffset suitable for phrase match, and
    1653             :                  * compute overall width of whole phrase match.
    1654             :                  */
    1655         352 :                 Loffset = curitem->qoperator.distance + Rdata.width;
    1656         352 :                 Roffset = 0;
    1657         352 :                 if (data)
    1658         192 :                     data->width = curitem->qoperator.distance +
    1659         128 :                         Ldata.width + Rdata.width;
    1660             :             }
    1661             :             else
    1662             :             {
    1663             :                 /*
    1664             :                  * For OP_AND, set output width and alignment like OP_OR (see
    1665             :                  * comment below)
    1666             :                  */
    1667           4 :                 maxwidth = Max(Ldata.width, Rdata.width);
    1668           4 :                 Loffset = maxwidth - Ldata.width;
    1669           4 :                 Roffset = maxwidth - Rdata.width;
    1670           4 :                 if (data)
    1671           4 :                     data->width = maxwidth;
    1672             :             }
    1673             : 
    1674         356 :             if (Ldata.negate && Rdata.negate)
    1675             :             {
    1676             :                 /* !L & !R: treat as !(L | R) */
    1677           0 :                 (void) TS_phrase_output(data, &Ldata, &Rdata,
    1678             :                                         TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
    1679             :                                         Loffset, Roffset,
    1680           0 :                                         Ldata.npos + Rdata.npos);
    1681           0 :                 if (data)
    1682           0 :                     data->negate = true;
    1683           0 :                 return true;
    1684             :             }
    1685         356 :             else if (Ldata.negate)
    1686             :             {
    1687             :                 /* !L & R */
    1688          20 :                 return TS_phrase_output(data, &Ldata, &Rdata,
    1689             :                                         TSPO_R_ONLY,
    1690             :                                         Loffset, Roffset,
    1691             :                                         Rdata.npos);
    1692             :             }
    1693         336 :             else if (Rdata.negate)
    1694             :             {
    1695             :                 /* L & !R */
    1696           4 :                 return TS_phrase_output(data, &Ldata, &Rdata,
    1697             :                                         TSPO_L_ONLY,
    1698             :                                         Loffset, Roffset,
    1699             :                                         Ldata.npos);
    1700             :             }
    1701             :             else
    1702             :             {
    1703             :                 /* straight AND */
    1704         332 :                 return TS_phrase_output(data, &Ldata, &Rdata,
    1705             :                                         TSPO_BOTH,
    1706             :                                         Loffset, Roffset,
    1707         332 :                                         Min(Ldata.npos, Rdata.npos));
    1708             :             }
    1709             : 
    1710             :         case OP_OR:
    1711          88 :             memset(&Ldata, 0, sizeof(Ldata));
    1712          88 :             memset(&Rdata, 0, sizeof(Rdata));
    1713             : 
    1714          88 :             lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
    1715             :                                        arg, flags, chkcond, &Ldata);
    1716          88 :             rmatch = TS_phrase_execute(curitem + 1,
    1717             :                                        arg, flags, chkcond, &Rdata);
    1718             : 
    1719          88 :             if (!lmatch && !rmatch)
    1720           8 :                 return false;
    1721             : 
    1722             :             /*
    1723             :              * If a valid operand has no position information, then we can't
    1724             :              * return position data, only a "possible match" result. "Possible
    1725             :              * match" answers are only wanted when TS_EXEC_PHRASE_NO_POS flag
    1726             :              * is set, otherwise return false.
    1727             :              */
    1728          80 :             if ((lmatch && Ldata.npos == 0 && !Ldata.negate) ||
    1729          28 :                 (rmatch && Rdata.npos == 0 && !Rdata.negate))
    1730           0 :                 return (flags & TS_EXEC_PHRASE_NO_POS) ? true : false;
    1731             : 
    1732             :             /*
    1733             :              * Cope with undefined output width from failed submatch.  (This
    1734             :              * takes less code than trying to ensure that all failure returns
    1735             :              * set data->width to zero.)
    1736             :              */
    1737          80 :             if (!lmatch)
    1738          12 :                 Ldata.width = 0;
    1739          80 :             if (!rmatch)
    1740          52 :                 Rdata.width = 0;
    1741             : 
    1742             :             /*
    1743             :              * For OP_AND and OP_OR, report the width of the wider of the two
    1744             :              * inputs, and align the narrower input's positions to the right
    1745             :              * end of that width.  This rule deals at least somewhat
    1746             :              * reasonably with cases like "x <-> (y | z <-> q)".
    1747             :              */
    1748          80 :             maxwidth = Max(Ldata.width, Rdata.width);
    1749          80 :             Loffset = maxwidth - Ldata.width;
    1750          80 :             Roffset = maxwidth - Rdata.width;
    1751          80 :             data->width = maxwidth;
    1752             : 
    1753          80 :             if (Ldata.negate && Rdata.negate)
    1754             :             {
    1755             :                 /* !L | !R: treat as !(L & R) */
    1756           0 :                 (void) TS_phrase_output(data, &Ldata, &Rdata,
    1757             :                                         TSPO_BOTH,
    1758             :                                         Loffset, Roffset,
    1759           0 :                                         Min(Ldata.npos, Rdata.npos));
    1760           0 :                 data->negate = true;
    1761           0 :                 return true;
    1762             :             }
    1763          80 :             else if (Ldata.negate)
    1764             :             {
    1765             :                 /* !L | R: treat as !(L & !R) */
    1766          16 :                 (void) TS_phrase_output(data, &Ldata, &Rdata,
    1767             :                                         TSPO_L_ONLY,
    1768             :                                         Loffset, Roffset,
    1769             :                                         Ldata.npos);
    1770          16 :                 data->negate = true;
    1771          16 :                 return true;
    1772             :             }
    1773          64 :             else if (Rdata.negate)
    1774             :             {
    1775             :                 /* L | !R: treat as !(!L & R) */
    1776           0 :                 (void) TS_phrase_output(data, &Ldata, &Rdata,
    1777             :                                         TSPO_R_ONLY,
    1778             :                                         Loffset, Roffset,
    1779             :                                         Rdata.npos);
    1780           0 :                 data->negate = true;
    1781           0 :                 return true;
    1782             :             }
    1783             :             else
    1784             :             {
    1785             :                 /* straight OR */
    1786          64 :                 return TS_phrase_output(data, &Ldata, &Rdata,
    1787             :                                         TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
    1788             :                                         Loffset, Roffset,
    1789          64 :                                         Ldata.npos + Rdata.npos);
    1790             :             }
    1791             : 
    1792             :         default:
    1793           0 :             elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
    1794             :     }
    1795             : 
    1796             :     /* not reachable, but keep compiler quiet */
    1797             :     return false;
    1798             : }
    1799             : 
    1800             : 
    1801             : /*
    1802             :  * Evaluate tsquery boolean expression.
    1803             :  *
    1804             :  * curitem: current tsquery item (initially, the first one)
    1805             :  * arg: opaque value to pass through to callback function
    1806             :  * flags: bitmask of flag bits shown in ts_utils.h
    1807             :  * chkcond: callback function to check whether a primitive value is present
    1808             :  *
    1809             :  * The logic here deals only with operators above any phrase operator, for
    1810             :  * which we do not need to worry about lexeme positions.  As soon as we hit an
    1811             :  * OP_PHRASE operator, we pass it off to TS_phrase_execute which does worry.
    1812             :  */
    1813             : bool
    1814      192158 : TS_execute(QueryItem *curitem, void *arg, uint32 flags,
    1815             :            TSExecuteCallback chkcond)
    1816             : {
    1817             :     /* since this function recurses, it could be driven to stack overflow */
    1818      192158 :     check_stack_depth();
    1819             : 
    1820      192158 :     if (curitem->type == QI_VAL)
    1821      109378 :         return chkcond(arg, (QueryOperand *) curitem,
    1822             :                        NULL /* we don't need position info */ );
    1823             : 
    1824       82780 :     switch (curitem->qoperator.oper)
    1825             :     {
    1826             :         case OP_NOT:
    1827       10320 :             if (flags & TS_EXEC_CALC_NOT)
    1828       10168 :                 return !TS_execute(curitem + 1, arg, flags, chkcond);
    1829             :             else
    1830         152 :                 return true;
    1831             : 
    1832             :         case OP_AND:
    1833       32388 :             if (TS_execute(curitem + curitem->qoperator.left, arg, flags, chkcond))
    1834        5968 :                 return TS_execute(curitem + 1, arg, flags, chkcond);
    1835             :             else
    1836       26420 :                 return false;
    1837             : 
    1838             :         case OP_OR:
    1839       39600 :             if (TS_execute(curitem + curitem->qoperator.left, arg, flags, chkcond))
    1840       14796 :                 return true;
    1841             :             else
    1842       24804 :                 return TS_execute(curitem + 1, arg, flags, chkcond);
    1843             : 
    1844             :         case OP_PHRASE:
    1845         472 :             return TS_phrase_execute(curitem, arg, flags, chkcond, NULL);
    1846             : 
    1847             :         default:
    1848           0 :             elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
    1849             :     }
    1850             : 
    1851             :     /* not reachable, but keep compiler quiet */
    1852             :     return false;
    1853             : }
    1854             : 
    1855             : /*
    1856             :  * Detect whether a tsquery boolean expression requires any positive matches
    1857             :  * to values shown in the tsquery.
    1858             :  *
    1859             :  * This is needed to know whether a GIN index search requires full index scan.
    1860             :  * For example, 'x & !y' requires a match of x, so it's sufficient to scan
    1861             :  * entries for x; but 'x | !y' could match rows containing neither x nor y.
    1862             :  */
    1863             : bool
    1864         320 : tsquery_requires_match(QueryItem *curitem)
    1865             : {
    1866             :     /* since this function recurses, it could be driven to stack overflow */
    1867         320 :     check_stack_depth();
    1868             : 
    1869         320 :     if (curitem->type == QI_VAL)
    1870         188 :         return true;
    1871             : 
    1872         132 :     switch (curitem->qoperator.oper)
    1873             :     {
    1874             :         case OP_NOT:
    1875             : 
    1876             :             /*
    1877             :              * Assume there are no required matches underneath a NOT.  For
    1878             :              * some cases with nested NOTs, we could prove there's a required
    1879             :              * match, but it seems unlikely to be worth the trouble.
    1880             :              */
    1881           8 :             return false;
    1882             : 
    1883             :         case OP_PHRASE:
    1884             : 
    1885             :             /*
    1886             :              * Treat OP_PHRASE as OP_AND here
    1887             :              */
    1888             :         case OP_AND:
    1889             :             /* If either side requires a match, we're good */
    1890          80 :             if (tsquery_requires_match(curitem + curitem->qoperator.left))
    1891          80 :                 return true;
    1892             :             else
    1893           0 :                 return tsquery_requires_match(curitem + 1);
    1894             : 
    1895             :         case OP_OR:
    1896             :             /* Both sides must require a match */
    1897          44 :             if (tsquery_requires_match(curitem + curitem->qoperator.left))
    1898          44 :                 return tsquery_requires_match(curitem + 1);
    1899             :             else
    1900           0 :                 return false;
    1901             : 
    1902             :         default:
    1903           0 :             elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
    1904             :     }
    1905             : 
    1906             :     /* not reachable, but keep compiler quiet */
    1907             :     return false;
    1908             : }
    1909             : 
    1910             : /*
    1911             :  * boolean operations
    1912             :  */
    1913             : Datum
    1914          40 : ts_match_qv(PG_FUNCTION_ARGS)
    1915             : {
    1916          40 :     PG_RETURN_DATUM(DirectFunctionCall2(ts_match_vq,
    1917             :                                         PG_GETARG_DATUM(1),
    1918             :                                         PG_GETARG_DATUM(0)));
    1919             : }
    1920             : 
    1921             : Datum
    1922       37924 : ts_match_vq(PG_FUNCTION_ARGS)
    1923             : {
    1924       37924 :     TSVector    val = PG_GETARG_TSVECTOR(0);
    1925       37924 :     TSQuery     query = PG_GETARG_TSQUERY(1);
    1926             :     CHKVAL      chkval;
    1927             :     bool        result;
    1928             : 
    1929             :     /* empty query matches nothing */
    1930       37924 :     if (!query->size)
    1931             :     {
    1932           0 :         PG_FREE_IF_COPY(val, 0);
    1933           0 :         PG_FREE_IF_COPY(query, 1);
    1934           0 :         PG_RETURN_BOOL(false);
    1935             :     }
    1936             : 
    1937       37924 :     chkval.arrb = ARRPTR(val);
    1938       37924 :     chkval.arre = chkval.arrb + val->size;
    1939       37924 :     chkval.values = STRPTR(val);
    1940       37924 :     chkval.operand = GETOPERAND(query);
    1941       37924 :     result = TS_execute(GETQUERY(query),
    1942             :                         &chkval,
    1943             :                         TS_EXEC_CALC_NOT,
    1944             :                         checkcondition_str);
    1945             : 
    1946       37924 :     PG_FREE_IF_COPY(val, 0);
    1947       37924 :     PG_FREE_IF_COPY(query, 1);
    1948       37924 :     PG_RETURN_BOOL(result);
    1949             : }
    1950             : 
    1951             : Datum
    1952           0 : ts_match_tt(PG_FUNCTION_ARGS)
    1953             : {
    1954             :     TSVector    vector;
    1955             :     TSQuery     query;
    1956             :     bool        res;
    1957             : 
    1958           0 :     vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
    1959             :                                                   PG_GETARG_DATUM(0)));
    1960           0 :     query = DatumGetTSQuery(DirectFunctionCall1(plainto_tsquery,
    1961             :                                                 PG_GETARG_DATUM(1)));
    1962             : 
    1963           0 :     res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
    1964             :                                            TSVectorGetDatum(vector),
    1965             :                                            TSQueryGetDatum(query)));
    1966             : 
    1967           0 :     pfree(vector);
    1968           0 :     pfree(query);
    1969             : 
    1970           0 :     PG_RETURN_BOOL(res);
    1971             : }
    1972             : 
    1973             : Datum
    1974           0 : ts_match_tq(PG_FUNCTION_ARGS)
    1975             : {
    1976             :     TSVector    vector;
    1977           0 :     TSQuery     query = PG_GETARG_TSQUERY(1);
    1978             :     bool        res;
    1979             : 
    1980           0 :     vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
    1981             :                                                   PG_GETARG_DATUM(0)));
    1982             : 
    1983           0 :     res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
    1984             :                                            TSVectorGetDatum(vector),
    1985             :                                            TSQueryGetDatum(query)));
    1986             : 
    1987           0 :     pfree(vector);
    1988           0 :     PG_FREE_IF_COPY(query, 1);
    1989             : 
    1990           0 :     PG_RETURN_BOOL(res);
    1991             : }
    1992             : 
    1993             : /*
    1994             :  * ts_stat statistic function support
    1995             :  */
    1996             : 
    1997             : 
    1998             : /*
    1999             :  * Returns the number of positions in value 'wptr' within tsvector 'txt',
    2000             :  * that have a weight equal to one of the weights in 'weight' bitmask.
    2001             :  */
    2002             : static int
    2003           4 : check_weight(TSVector txt, WordEntry *wptr, int8 weight)
    2004             : {
    2005           4 :     int         len = POSDATALEN(txt, wptr);
    2006           4 :     int         num = 0;
    2007           4 :     WordEntryPos *ptr = POSDATAPTR(txt, wptr);
    2008             : 
    2009          24 :     while (len--)
    2010             :     {
    2011          16 :         if (weight & (1 << WEP_GETWEIGHT(*ptr)))
    2012           8 :             num++;
    2013          16 :         ptr++;
    2014             :     }
    2015           4 :     return num;
    2016             : }
    2017             : 
    2018             : #define compareStatWord(a,e,t)                          \
    2019             :     tsCompareString((a)->lexeme, (a)->lenlexeme,      \
    2020             :                     STRPTR(t) + (e)->pos, (e)->len,       \
    2021             :                     false)
    2022             : 
    2023             : static void
    2024      230552 : insertStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, uint32 off)
    2025             : {
    2026      230552 :     WordEntry  *we = ARRPTR(txt) + off;
    2027      230552 :     StatEntry  *node = stat->root,
    2028      230552 :                *pnode = NULL;
    2029             :     int         n,
    2030      230552 :                 res = 0;
    2031      230552 :     uint32      depth = 1;
    2032             : 
    2033      230552 :     if (stat->weight == 0)
    2034      115276 :         n = (we->haspos) ? POSDATALEN(txt, we) : 1;
    2035             :     else
    2036      115276 :         n = (we->haspos) ? check_weight(txt, we, stat->weight) : 0;
    2037             : 
    2038      230552 :     if (n == 0)
    2039      115272 :         return;                 /* nothing to insert */
    2040             : 
    2041     1279492 :     while (node)
    2042             :     {
    2043     1159636 :         res = compareStatWord(node, we, txt);
    2044             : 
    2045     1159636 :         if (res == 0)
    2046             :         {
    2047      110704 :             break;
    2048             :         }
    2049             :         else
    2050             :         {
    2051     1048932 :             pnode = node;
    2052     1048932 :             node = (res < 0) ? node->left : node->right;
    2053             :         }
    2054     1048932 :         depth++;
    2055             :     }
    2056             : 
    2057      115280 :     if (depth > stat->maxdepth)
    2058          84 :         stat->maxdepth = depth;
    2059             : 
    2060      115280 :     if (node == NULL)
    2061             :     {
    2062        4576 :         node = MemoryContextAlloc(persistentContext, STATENTRYHDRSZ + we->len);
    2063        4576 :         node->left = node->right = NULL;
    2064        4576 :         node->ndoc = 1;
    2065        4576 :         node->nentry = n;
    2066        4576 :         node->lenlexeme = we->len;
    2067        4576 :         memcpy(node->lexeme, STRPTR(txt) + we->pos, node->lenlexeme);
    2068             : 
    2069        4576 :         if (pnode == NULL)
    2070             :         {
    2071           8 :             stat->root = node;
    2072             :         }
    2073             :         else
    2074             :         {
    2075        4568 :             if (res < 0)
    2076        2248 :                 pnode->left = node;
    2077             :             else
    2078        2320 :                 pnode->right = node;
    2079             :         }
    2080             : 
    2081             :     }
    2082             :     else
    2083             :     {
    2084      110704 :         node->ndoc++;
    2085      110704 :         node->nentry += n;
    2086             :     }
    2087             : }
    2088             : 
    2089             : static void
    2090      330768 : chooseNextStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt,
    2091             :                     uint32 low, uint32 high, uint32 offset)
    2092             : {
    2093             :     uint32      pos;
    2094      330768 :     uint32      middle = (low + high) >> 1;
    2095             : 
    2096      330768 :     pos = (low + middle) >> 1;
    2097      330768 :     if (low != middle && pos >= offset && pos - offset < txt->size)
    2098      113624 :         insertStatEntry(persistentContext, stat, txt, pos - offset);
    2099      330768 :     pos = (high + middle + 1) >> 1;
    2100      330768 :     if (middle + 1 != high && pos >= offset && pos - offset < txt->size)
    2101      112920 :         insertStatEntry(persistentContext, stat, txt, pos - offset);
    2102             : 
    2103      330768 :     if (low != middle)
    2104      165384 :         chooseNextStatEntry(persistentContext, stat, txt, low, middle, offset);
    2105      330768 :     if (high != middle + 1)
    2106      161376 :         chooseNextStatEntry(persistentContext, stat, txt, middle + 1, high, offset);
    2107      330768 : }
    2108             : 
    2109             : /*
    2110             :  * This is written like a custom aggregate function, because the
    2111             :  * original plan was to do just that. Unfortunately, an aggregate function
    2112             :  * can't return a set, so that plan was abandoned. If that limitation is
    2113             :  * lifted in the future, ts_stat could be a real aggregate function so that
    2114             :  * you could use it like this:
    2115             :  *
    2116             :  *   SELECT ts_stat(vector_column) FROM vector_table;
    2117             :  *
    2118             :  *  where vector_column is a tsvector-type column in vector_table.
    2119             :  */
    2120             : 
    2121             : static TSVectorStat *
    2122        4072 : ts_accum(MemoryContext persistentContext, TSVectorStat *stat, Datum data)
    2123             : {
    2124        4072 :     TSVector    txt = DatumGetTSVector(data);
    2125             :     uint32      i,
    2126        4072 :                 nbit = 0,
    2127             :                 offset;
    2128             : 
    2129        4072 :     if (stat == NULL)
    2130             :     {                           /* Init in first */
    2131           0 :         stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
    2132           0 :         stat->maxdepth = 1;
    2133             :     }
    2134             : 
    2135             :     /* simple check of correctness */
    2136        4072 :     if (txt == NULL || txt->size == 0)
    2137             :     {
    2138          64 :         if (txt && txt != (TSVector) DatumGetPointer(data))
    2139          64 :             pfree(txt);
    2140          64 :         return stat;
    2141             :     }
    2142             : 
    2143        4008 :     i = txt->size - 1;
    2144       28488 :     for (; i > 0; i >>= 1)
    2145       24480 :         nbit++;
    2146             : 
    2147        4008 :     nbit = 1 << nbit;
    2148        4008 :     offset = (nbit - txt->size) / 2;
    2149             : 
    2150        4008 :     insertStatEntry(persistentContext, stat, txt, (nbit >> 1) - offset);
    2151        4008 :     chooseNextStatEntry(persistentContext, stat, txt, 0, nbit, offset);
    2152             : 
    2153        4008 :     return stat;
    2154             : }
    2155             : 
    2156             : static void
    2157           8 : ts_setup_firstcall(FunctionCallInfo fcinfo, FuncCallContext *funcctx,
    2158             :                    TSVectorStat *stat)
    2159             : {
    2160             :     TupleDesc   tupdesc;
    2161             :     MemoryContext oldcontext;
    2162             :     StatEntry  *node;
    2163             : 
    2164           8 :     funcctx->user_fctx = (void *) stat;
    2165             : 
    2166           8 :     oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
    2167             : 
    2168           8 :     stat->stack = palloc0(sizeof(StatEntry *) * (stat->maxdepth + 1));
    2169           8 :     stat->stackpos = 0;
    2170             : 
    2171           8 :     node = stat->root;
    2172             :     /* find leftmost value */
    2173           8 :     if (node == NULL)
    2174           0 :         stat->stack[stat->stackpos] = NULL;
    2175             :     else
    2176             :         for (;;)
    2177             :         {
    2178          56 :             stat->stack[stat->stackpos] = node;
    2179          32 :             if (node->left)
    2180             :             {
    2181          24 :                 stat->stackpos++;
    2182          24 :                 node = node->left;
    2183             :             }
    2184             :             else
    2185           8 :                 break;
    2186             :         }
    2187             :     Assert(stat->stackpos <= stat->maxdepth);
    2188             : 
    2189           8 :     tupdesc = CreateTemplateTupleDesc(3);
    2190           8 :     TupleDescInitEntry(tupdesc, (AttrNumber) 1, "word",
    2191             :                        TEXTOID, -1, 0);
    2192           8 :     TupleDescInitEntry(tupdesc, (AttrNumber) 2, "ndoc",
    2193             :                        INT4OID, -1, 0);
    2194           8 :     TupleDescInitEntry(tupdesc, (AttrNumber) 3, "nentry",
    2195             :                        INT4OID, -1, 0);
    2196           8 :     funcctx->tuple_desc = BlessTupleDesc(tupdesc);
    2197           8 :     funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
    2198             : 
    2199           8 :     MemoryContextSwitchTo(oldcontext);
    2200           8 : }
    2201             : 
    2202             : static StatEntry *
    2203        9152 : walkStatEntryTree(TSVectorStat *stat)
    2204             : {
    2205        9152 :     StatEntry  *node = stat->stack[stat->stackpos];
    2206             : 
    2207        9152 :     if (node == NULL)
    2208           0 :         return NULL;
    2209             : 
    2210        9152 :     if (node->ndoc != 0)
    2211             :     {
    2212             :         /* return entry itself: we already was at left sublink */
    2213        2256 :         return node;
    2214             :     }
    2215        6896 :     else if (node->right && node->right != stat->stack[stat->stackpos + 1])
    2216             :     {
    2217             :         /* go on right sublink */
    2218        2320 :         stat->stackpos++;
    2219        2320 :         node = node->right;
    2220             : 
    2221             :         /* find most-left value */
    2222             :         for (;;)
    2223             :         {
    2224        6768 :             stat->stack[stat->stackpos] = node;
    2225        4544 :             if (node->left)
    2226             :             {
    2227        2224 :                 stat->stackpos++;
    2228        2224 :                 node = node->left;
    2229             :             }
    2230             :             else
    2231        2320 :                 break;
    2232             :         }
    2233        2320 :         Assert(stat->stackpos <= stat->maxdepth);
    2234             :     }
    2235             :     else
    2236             :     {
    2237             :         /* we already return all left subtree, itself and  right subtree */
    2238        4576 :         if (stat->stackpos == 0)
    2239           8 :             return NULL;
    2240             : 
    2241        4568 :         stat->stackpos--;
    2242        4568 :         return walkStatEntryTree(stat);
    2243             :     }
    2244             : 
    2245        2320 :     return node;
    2246             : }
    2247             : 
    2248             : static Datum
    2249        4584 : ts_process_call(FuncCallContext *funcctx)
    2250             : {
    2251             :     TSVectorStat *st;
    2252             :     StatEntry  *entry;
    2253             : 
    2254        4584 :     st = (TSVectorStat *) funcctx->user_fctx;
    2255             : 
    2256        4584 :     entry = walkStatEntryTree(st);
    2257             : 
    2258        4584 :     if (entry != NULL)
    2259             :     {
    2260             :         Datum       result;
    2261             :         char       *values[3];
    2262             :         char        ndoc[16];
    2263             :         char        nentry[16];
    2264             :         HeapTuple   tuple;
    2265             : 
    2266        4576 :         values[0] = palloc(entry->lenlexeme + 1);
    2267        4576 :         memcpy(values[0], entry->lexeme, entry->lenlexeme);
    2268        4576 :         (values[0])[entry->lenlexeme] = '\0';
    2269        4576 :         sprintf(ndoc, "%d", entry->ndoc);
    2270        4576 :         values[1] = ndoc;
    2271        4576 :         sprintf(nentry, "%d", entry->nentry);
    2272        4576 :         values[2] = nentry;
    2273             : 
    2274        4576 :         tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
    2275        4576 :         result = HeapTupleGetDatum(tuple);
    2276             : 
    2277        4576 :         pfree(values[0]);
    2278             : 
    2279             :         /* mark entry as already visited */
    2280        4576 :         entry->ndoc = 0;
    2281             : 
    2282        4576 :         return result;
    2283             :     }
    2284             : 
    2285           8 :     return (Datum) 0;
    2286             : }
    2287             : 
    2288             : static TSVectorStat *
    2289           8 : ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws)
    2290             : {
    2291           8 :     char       *query = text_to_cstring(txt);
    2292             :     TSVectorStat *stat;
    2293             :     bool        isnull;
    2294             :     Portal      portal;
    2295             :     SPIPlanPtr  plan;
    2296             : 
    2297           8 :     if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
    2298             :         /* internal error */
    2299           0 :         elog(ERROR, "SPI_prepare(\"%s\") failed", query);
    2300             : 
    2301           8 :     if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
    2302             :         /* internal error */
    2303           0 :         elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
    2304             : 
    2305           8 :     SPI_cursor_fetch(portal, true, 100);
    2306             : 
    2307          16 :     if (SPI_tuptable == NULL ||
    2308          16 :         SPI_tuptable->tupdesc->natts != 1 ||
    2309           8 :         !IsBinaryCoercible(SPI_gettypeid(SPI_tuptable->tupdesc, 1),
    2310             :                            TSVECTOROID))
    2311           0 :         ereport(ERROR,
    2312             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    2313             :                  errmsg("ts_stat query must return one tsvector column")));
    2314             : 
    2315           8 :     stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
    2316           8 :     stat->maxdepth = 1;
    2317             : 
    2318           8 :     if (ws)
    2319             :     {
    2320             :         char       *buf;
    2321             : 
    2322           4 :         buf = VARDATA_ANY(ws);
    2323          16 :         while (buf - VARDATA_ANY(ws) < VARSIZE_ANY_EXHDR(ws))
    2324             :         {
    2325           8 :             if (pg_mblen(buf) == 1)
    2326             :             {
    2327           8 :                 switch (*buf)
    2328             :                 {
    2329             :                     case 'A':
    2330             :                     case 'a':
    2331           4 :                         stat->weight |= 1 << 3;
    2332           4 :                         break;
    2333             :                     case 'B':
    2334             :                     case 'b':
    2335           4 :                         stat->weight |= 1 << 2;
    2336           4 :                         break;
    2337             :                     case 'C':
    2338             :                     case 'c':
    2339           0 :                         stat->weight |= 1 << 1;
    2340           0 :                         break;
    2341             :                     case 'D':
    2342             :                     case 'd':
    2343           0 :                         stat->weight |= 1;
    2344           0 :                         break;
    2345             :                     default:
    2346           0 :                         stat->weight |= 0;
    2347             :                 }
    2348             :             }
    2349           8 :             buf += pg_mblen(buf);
    2350             :         }
    2351             :     }
    2352             : 
    2353          64 :     while (SPI_processed > 0)
    2354             :     {
    2355             :         uint64      i;
    2356             : 
    2357        4120 :         for (i = 0; i < SPI_processed; i++)
    2358             :         {
    2359        4072 :             Datum       data = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
    2360             : 
    2361        4072 :             if (!isnull)
    2362        4072 :                 stat = ts_accum(persistentContext, stat, data);
    2363             :         }
    2364             : 
    2365          48 :         SPI_freetuptable(SPI_tuptable);
    2366          48 :         SPI_cursor_fetch(portal, true, 100);
    2367             :     }
    2368             : 
    2369           8 :     SPI_freetuptable(SPI_tuptable);
    2370           8 :     SPI_cursor_close(portal);
    2371           8 :     SPI_freeplan(plan);
    2372           8 :     pfree(query);
    2373             : 
    2374           8 :     return stat;
    2375             : }
    2376             : 
    2377             : Datum
    2378        4576 : ts_stat1(PG_FUNCTION_ARGS)
    2379             : {
    2380             :     FuncCallContext *funcctx;
    2381             :     Datum       result;
    2382             : 
    2383        4576 :     if (SRF_IS_FIRSTCALL())
    2384             :     {
    2385             :         TSVectorStat *stat;
    2386           4 :         text       *txt = PG_GETARG_TEXT_PP(0);
    2387             : 
    2388           4 :         funcctx = SRF_FIRSTCALL_INIT();
    2389           4 :         SPI_connect();
    2390           4 :         stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, NULL);
    2391           4 :         PG_FREE_IF_COPY(txt, 0);
    2392           4 :         ts_setup_firstcall(fcinfo, funcctx, stat);
    2393           4 :         SPI_finish();
    2394             :     }
    2395             : 
    2396        4576 :     funcctx = SRF_PERCALL_SETUP();
    2397        4576 :     if ((result = ts_process_call(funcctx)) != (Datum) 0)
    2398        4572 :         SRF_RETURN_NEXT(funcctx, result);
    2399           4 :     SRF_RETURN_DONE(funcctx);
    2400             : }
    2401             : 
    2402             : Datum
    2403           8 : ts_stat2(PG_FUNCTION_ARGS)
    2404             : {
    2405             :     FuncCallContext *funcctx;
    2406             :     Datum       result;
    2407             : 
    2408           8 :     if (SRF_IS_FIRSTCALL())
    2409             :     {
    2410             :         TSVectorStat *stat;
    2411           4 :         text       *txt = PG_GETARG_TEXT_PP(0);
    2412           4 :         text       *ws = PG_GETARG_TEXT_PP(1);
    2413             : 
    2414           4 :         funcctx = SRF_FIRSTCALL_INIT();
    2415           4 :         SPI_connect();
    2416           4 :         stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, ws);
    2417           4 :         PG_FREE_IF_COPY(txt, 0);
    2418           4 :         PG_FREE_IF_COPY(ws, 1);
    2419           4 :         ts_setup_firstcall(fcinfo, funcctx, stat);
    2420           4 :         SPI_finish();
    2421             :     }
    2422             : 
    2423           8 :     funcctx = SRF_PERCALL_SETUP();
    2424           8 :     if ((result = ts_process_call(funcctx)) != (Datum) 0)
    2425           4 :         SRF_RETURN_NEXT(funcctx, result);
    2426           4 :     SRF_RETURN_DONE(funcctx);
    2427             : }
    2428             : 
    2429             : 
    2430             : /*
    2431             :  * Triggers for automatic update of a tsvector column from text column(s)
    2432             :  *
    2433             :  * Trigger arguments are either
    2434             :  *      name of tsvector col, name of tsconfig to use, name(s) of text col(s)
    2435             :  *      name of tsvector col, name of regconfig col, name(s) of text col(s)
    2436             :  * ie, tsconfig can either be specified by name, or indirectly as the
    2437             :  * contents of a regconfig field in the row.  If the name is used, it must
    2438             :  * be explicitly schema-qualified.
    2439             :  */
    2440             : Datum
    2441          12 : tsvector_update_trigger_byid(PG_FUNCTION_ARGS)
    2442             : {
    2443          12 :     return tsvector_update_trigger(fcinfo, false);
    2444             : }
    2445             : 
    2446             : Datum
    2447           0 : tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS)
    2448             : {
    2449           0 :     return tsvector_update_trigger(fcinfo, true);
    2450             : }
    2451             : 
    2452             : static Datum
    2453          12 : tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
    2454             : {
    2455             :     TriggerData *trigdata;
    2456             :     Trigger    *trigger;
    2457             :     Relation    rel;
    2458          12 :     HeapTuple   rettuple = NULL;
    2459             :     int         tsvector_attr_num,
    2460             :                 i;
    2461             :     ParsedText  prs;
    2462             :     Datum       datum;
    2463             :     bool        isnull;
    2464             :     text       *txt;
    2465             :     Oid         cfgId;
    2466             : 
    2467             :     /* Check call context */
    2468          12 :     if (!CALLED_AS_TRIGGER(fcinfo)) /* internal error */
    2469           0 :         elog(ERROR, "tsvector_update_trigger: not fired by trigger manager");
    2470             : 
    2471          12 :     trigdata = (TriggerData *) fcinfo->context;
    2472          12 :     if (!TRIGGER_FIRED_FOR_ROW(trigdata->tg_event))
    2473           0 :         elog(ERROR, "tsvector_update_trigger: must be fired for row");
    2474          12 :     if (!TRIGGER_FIRED_BEFORE(trigdata->tg_event))
    2475           0 :         elog(ERROR, "tsvector_update_trigger: must be fired BEFORE event");
    2476             : 
    2477          12 :     if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
    2478           8 :         rettuple = trigdata->tg_trigtuple;
    2479           4 :     else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
    2480           4 :         rettuple = trigdata->tg_newtuple;
    2481             :     else
    2482           0 :         elog(ERROR, "tsvector_update_trigger: must be fired for INSERT or UPDATE");
    2483             : 
    2484          12 :     trigger = trigdata->tg_trigger;
    2485          12 :     rel = trigdata->tg_relation;
    2486             : 
    2487          12 :     if (trigger->tgnargs < 3)
    2488           0 :         elog(ERROR, "tsvector_update_trigger: arguments must be tsvector_field, ts_config, text_field1, ...)");
    2489             : 
    2490             :     /* Find the target tsvector column */
    2491          12 :     tsvector_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
    2492          12 :     if (tsvector_attr_num == SPI_ERROR_NOATTRIBUTE)
    2493           0 :         ereport(ERROR,
    2494             :                 (errcode(ERRCODE_UNDEFINED_COLUMN),
    2495             :                  errmsg("tsvector column \"%s\" does not exist",
    2496             :                         trigger->tgargs[0])));
    2497             :     /* This will effectively reject system columns, so no separate test: */
    2498          12 :     if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, tsvector_attr_num),
    2499             :                            TSVECTOROID))
    2500           0 :         ereport(ERROR,
    2501             :                 (errcode(ERRCODE_DATATYPE_MISMATCH),
    2502             :                  errmsg("column \"%s\" is not of tsvector type",
    2503             :                         trigger->tgargs[0])));
    2504             : 
    2505             :     /* Find the configuration to use */
    2506          12 :     if (config_column)
    2507             :     {
    2508             :         int         config_attr_num;
    2509             : 
    2510           0 :         config_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[1]);
    2511           0 :         if (config_attr_num == SPI_ERROR_NOATTRIBUTE)
    2512           0 :             ereport(ERROR,
    2513             :                     (errcode(ERRCODE_UNDEFINED_COLUMN),
    2514             :                      errmsg("configuration column \"%s\" does not exist",
    2515             :                             trigger->tgargs[1])));
    2516           0 :         if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, config_attr_num),
    2517             :                                REGCONFIGOID))
    2518           0 :             ereport(ERROR,
    2519             :                     (errcode(ERRCODE_DATATYPE_MISMATCH),
    2520             :                      errmsg("column \"%s\" is not of regconfig type",
    2521             :                             trigger->tgargs[1])));
    2522             : 
    2523           0 :         datum = SPI_getbinval(rettuple, rel->rd_att, config_attr_num, &isnull);
    2524           0 :         if (isnull)
    2525           0 :             ereport(ERROR,
    2526             :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
    2527             :                      errmsg("configuration column \"%s\" must not be null",
    2528             :                             trigger->tgargs[1])));
    2529           0 :         cfgId = DatumGetObjectId(datum);
    2530             :     }
    2531             :     else
    2532             :     {
    2533             :         List       *names;
    2534             : 
    2535          12 :         names = stringToQualifiedNameList(trigger->tgargs[1]);
    2536             :         /* require a schema so that results are not search path dependent */
    2537          12 :         if (list_length(names) < 2)
    2538           0 :             ereport(ERROR,
    2539             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    2540             :                      errmsg("text search configuration name \"%s\" must be schema-qualified",
    2541             :                             trigger->tgargs[1])));
    2542          12 :         cfgId = get_ts_config_oid(names, false);
    2543             :     }
    2544             : 
    2545             :     /* initialize parse state */
    2546          12 :     prs.lenwords = 32;
    2547          12 :     prs.curwords = 0;
    2548          12 :     prs.pos = 0;
    2549          12 :     prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
    2550             : 
    2551             :     /* find all words in indexable column(s) */
    2552          24 :     for (i = 2; i < trigger->tgnargs; i++)
    2553             :     {
    2554             :         int         numattr;
    2555             : 
    2556          12 :         numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
    2557          12 :         if (numattr == SPI_ERROR_NOATTRIBUTE)
    2558           0 :             ereport(ERROR,
    2559             :                     (errcode(ERRCODE_UNDEFINED_COLUMN),
    2560             :                      errmsg("column \"%s\" does not exist",
    2561             :                             trigger->tgargs[i])));
    2562          12 :         if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, numattr), TEXTOID))
    2563           0 :             ereport(ERROR,
    2564             :                     (errcode(ERRCODE_DATATYPE_MISMATCH),
    2565             :                      errmsg("column \"%s\" is not of a character type",
    2566             :                             trigger->tgargs[i])));
    2567             : 
    2568          12 :         datum = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
    2569          12 :         if (isnull)
    2570           4 :             continue;
    2571             : 
    2572           8 :         txt = DatumGetTextPP(datum);
    2573             : 
    2574           8 :         parsetext(cfgId, &prs, VARDATA_ANY(txt), VARSIZE_ANY_EXHDR(txt));
    2575             : 
    2576           8 :         if (txt != (text *) DatumGetPointer(datum))
    2577           0 :             pfree(txt);
    2578             :     }
    2579             : 
    2580             :     /* make tsvector value */
    2581          12 :     datum = TSVectorGetDatum(make_tsvector(&prs));
    2582          12 :     isnull = false;
    2583             : 
    2584             :     /* and insert it into tuple */
    2585          12 :     rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
    2586             :                                          1, &tsvector_attr_num,
    2587             :                                          &datum, &isnull);
    2588             : 
    2589          12 :     pfree(DatumGetPointer(datum));
    2590             : 
    2591          12 :     return PointerGetDatum(rettuple);
    2592             : }

Generated by: LCOV version 1.13