LCOV - code coverage report
Current view: top level - src/backend/utils/adt - tsquery.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 460 593 77.6 %
Date: 2024-09-08 23:12:01 Functions: 20 23 87.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * tsquery.c
       4             :  *    I/O functions for tsquery
       5             :  *
       6             :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
       7             :  *
       8             :  *
       9             :  * IDENTIFICATION
      10             :  *    src/backend/utils/adt/tsquery.c
      11             :  *
      12             :  *-------------------------------------------------------------------------
      13             :  */
      14             : 
      15             : #include "postgres.h"
      16             : 
      17             : #include "libpq/pqformat.h"
      18             : #include "miscadmin.h"
      19             : #include "nodes/miscnodes.h"
      20             : #include "tsearch/ts_locale.h"
      21             : #include "tsearch/ts_type.h"
      22             : #include "tsearch/ts_utils.h"
      23             : #include "utils/builtins.h"
      24             : #include "utils/memutils.h"
      25             : #include "utils/pg_crc.h"
      26             : #include "varatt.h"
      27             : 
      28             : /* FTS operator priorities, see ts_type.h */
      29             : const int   tsearch_op_priority[OP_COUNT] =
      30             : {
      31             :     4,                          /* OP_NOT */
      32             :     2,                          /* OP_AND */
      33             :     1,                          /* OP_OR */
      34             :     3                           /* OP_PHRASE */
      35             : };
      36             : 
      37             : /*
      38             :  * parser's states
      39             :  */
      40             : typedef enum
      41             : {
      42             :     WAITOPERAND = 1,
      43             :     WAITOPERATOR = 2,
      44             :     WAITFIRSTOPERAND = 3,
      45             : } ts_parserstate;
      46             : 
      47             : /*
      48             :  * token types for parsing
      49             :  */
      50             : typedef enum
      51             : {
      52             :     PT_END = 0,
      53             :     PT_ERR = 1,
      54             :     PT_VAL = 2,
      55             :     PT_OPR = 3,
      56             :     PT_OPEN = 4,
      57             :     PT_CLOSE = 5,
      58             : } ts_tokentype;
      59             : 
      60             : /*
      61             :  * get token from query string
      62             :  *
      63             :  * All arguments except "state" are output arguments.
      64             :  *
      65             :  * If return value is PT_OPR, then *operator is filled with an OP_* code
      66             :  * and *weight will contain a distance value in case of phrase operator.
      67             :  *
      68             :  * If return value is PT_VAL, then *lenval, *strval, *weight, and *prefix
      69             :  * are filled.
      70             :  *
      71             :  * If PT_ERR is returned then a soft error has occurred.  If state->escontext
      72             :  * isn't already filled then this should be reported as a generic parse error.
      73             :  */
      74             : typedef ts_tokentype (*ts_tokenizer) (TSQueryParserState state, int8 *operator,
      75             :                                       int *lenval, char **strval,
      76             :                                       int16 *weight, bool *prefix);
      77             : 
      78             : struct TSQueryParserStateData
      79             : {
      80             :     /* Tokenizer used for parsing tsquery */
      81             :     ts_tokenizer gettoken;
      82             : 
      83             :     /* State of tokenizer function */
      84             :     char       *buffer;         /* entire string we are scanning */
      85             :     char       *buf;            /* current scan point */
      86             :     int         count;          /* nesting count, incremented by (,
      87             :                                  * decremented by ) */
      88             :     ts_parserstate state;
      89             : 
      90             :     /* polish (prefix) notation in list, filled in by push* functions */
      91             :     List       *polstr;
      92             : 
      93             :     /*
      94             :      * Strings from operands are collected in op. curop is a pointer to the
      95             :      * end of used space of op.
      96             :      */
      97             :     char       *op;
      98             :     char       *curop;
      99             :     int         lenop;          /* allocated size of op */
     100             :     int         sumlen;         /* used size of op */
     101             : 
     102             :     /* state for value's parser */
     103             :     TSVectorParseState valstate;
     104             : 
     105             :     /* context object for soft errors - must match valstate's escontext */
     106             :     Node       *escontext;
     107             : };
     108             : 
     109             : /*
     110             :  * subroutine to parse the modifiers (weight and prefix flag currently)
     111             :  * part, like ':AB*' of a query.
     112             :  */
     113             : static char *
     114        7206 : get_modifiers(char *buf, int16 *weight, bool *prefix)
     115             : {
     116        7206 :     *weight = 0;
     117        7206 :     *prefix = false;
     118             : 
     119        7206 :     if (!t_iseq(buf, ':'))
     120        6570 :         return buf;
     121             : 
     122         636 :     buf++;
     123        1488 :     while (*buf && pg_mblen(buf) == 1)
     124             :     {
     125        1068 :         switch (*buf)
     126             :         {
     127         234 :             case 'a':
     128             :             case 'A':
     129         234 :                 *weight |= 1 << 3;
     130         234 :                 break;
     131          66 :             case 'b':
     132             :             case 'B':
     133          66 :                 *weight |= 1 << 2;
     134          66 :                 break;
     135         114 :             case 'c':
     136             :             case 'C':
     137         114 :                 *weight |= 1 << 1;
     138         114 :                 break;
     139         120 :             case 'd':
     140             :             case 'D':
     141         120 :                 *weight |= 1;
     142         120 :                 break;
     143         318 :             case '*':
     144         318 :                 *prefix = true;
     145         318 :                 break;
     146         216 :             default:
     147         216 :                 return buf;
     148             :         }
     149         852 :         buf++;
     150             :     }
     151             : 
     152         420 :     return buf;
     153             : }
     154             : 
     155             : /*
     156             :  * Parse phrase operator. The operator
     157             :  * may take the following forms:
     158             :  *
     159             :  *      a <N> b (distance is exactly N lexemes)
     160             :  *      a <-> b (default distance = 1)
     161             :  *
     162             :  * The buffer should begin with '<' char
     163             :  */
     164             : static bool
     165        9078 : parse_phrase_operator(TSQueryParserState pstate, int16 *distance)
     166             : {
     167             :     enum
     168             :     {
     169             :         PHRASE_OPEN = 0,
     170             :         PHRASE_DIST,
     171             :         PHRASE_CLOSE,
     172             :         PHRASE_FINISH
     173        9078 :     }           state = PHRASE_OPEN;
     174        9078 :     char       *ptr = pstate->buf;
     175             :     char       *endptr;
     176        9078 :     long        l = 1;          /* default distance */
     177             : 
     178        9078 :     while (*ptr)
     179             :     {
     180       10984 :         switch (state)
     181             :         {
     182        5776 :             case PHRASE_OPEN:
     183        5776 :                 if (t_iseq(ptr, '<'))
     184             :                 {
     185        1740 :                     state = PHRASE_DIST;
     186        1740 :                     ptr++;
     187             :                 }
     188             :                 else
     189        4036 :                     return false;
     190        1740 :                 break;
     191             : 
     192        1740 :             case PHRASE_DIST:
     193        1740 :                 if (t_iseq(ptr, '-'))
     194             :                 {
     195        1446 :                     state = PHRASE_CLOSE;
     196        1446 :                     ptr++;
     197        1446 :                     continue;
     198             :                 }
     199             : 
     200         294 :                 if (!t_isdigit(ptr))
     201           0 :                     return false;
     202             : 
     203         294 :                 errno = 0;
     204         294 :                 l = strtol(ptr, &endptr, 10);
     205         294 :                 if (ptr == endptr)
     206           0 :                     return false;
     207         294 :                 else if (errno == ERANGE || l < 0 || l > MAXENTRYPOS)
     208           6 :                     ereturn(pstate->escontext, false,
     209             :                             (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     210             :                              errmsg("distance in phrase operator must be an integer value between zero and %d inclusive",
     211             :                                     MAXENTRYPOS)));
     212             :                 else
     213             :                 {
     214         288 :                     state = PHRASE_CLOSE;
     215         288 :                     ptr = endptr;
     216             :                 }
     217         288 :                 break;
     218             : 
     219        1734 :             case PHRASE_CLOSE:
     220        1734 :                 if (t_iseq(ptr, '>'))
     221             :                 {
     222        1734 :                     state = PHRASE_FINISH;
     223        1734 :                     ptr++;
     224             :                 }
     225             :                 else
     226           0 :                     return false;
     227        1734 :                 break;
     228             : 
     229        1734 :             case PHRASE_FINISH:
     230        1734 :                 *distance = (int16) l;
     231        1734 :                 pstate->buf = ptr;
     232        1734 :                 return true;
     233             :         }
     234       14286 :     }
     235             : 
     236        3302 :     return false;
     237             : }
     238             : 
     239             : /*
     240             :  * Parse OR operator used in websearch_to_tsquery(), returns true if we
     241             :  * believe that "OR" literal could be an operator OR
     242             :  */
     243             : static bool
     244        1398 : parse_or_operator(TSQueryParserState pstate)
     245             : {
     246        1398 :     char       *ptr = pstate->buf;
     247             : 
     248             :     /* it should begin with "OR" literal */
     249        1398 :     if (pg_strncasecmp(ptr, "or", 2) != 0)
     250        1248 :         return false;
     251             : 
     252         150 :     ptr += 2;
     253             : 
     254             :     /*
     255             :      * it shouldn't be a part of any word but somewhere later it should be
     256             :      * some operand
     257             :      */
     258         150 :     if (*ptr == '\0')           /* no operand */
     259           6 :         return false;
     260             : 
     261             :     /* it shouldn't be a part of any word */
     262         144 :     if (t_iseq(ptr, '-') || t_iseq(ptr, '_') || t_isalnum(ptr))
     263          24 :         return false;
     264             : 
     265             :     for (;;)
     266             :     {
     267         120 :         ptr += pg_mblen(ptr);
     268             : 
     269         120 :         if (*ptr == '\0')       /* got end of string without operand */
     270          12 :             return false;
     271             : 
     272             :         /*
     273             :          * Suppose, we found an operand, but could be a not correct operand.
     274             :          * So we still treat OR literal as operation with possibly incorrect
     275             :          * operand and will not search it as lexeme
     276             :          */
     277         108 :         if (!t_isspace(ptr))
     278         108 :             break;
     279             :     }
     280             : 
     281         108 :     pstate->buf += 2;
     282         108 :     return true;
     283             : }
     284             : 
     285             : static ts_tokentype
     286       17490 : gettoken_query_standard(TSQueryParserState state, int8 *operator,
     287             :                         int *lenval, char **strval,
     288             :                         int16 *weight, bool *prefix)
     289             : {
     290       17490 :     *weight = 0;
     291       17490 :     *prefix = false;
     292             : 
     293             :     while (true)
     294             :     {
     295       23384 :         switch (state->state)
     296             :         {
     297       12154 :             case WAITFIRSTOPERAND:
     298             :             case WAITOPERAND:
     299       12154 :                 if (t_iseq(state->buf, '!'))
     300             :                 {
     301         930 :                     state->buf++;
     302         930 :                     state->state = WAITOPERAND;
     303         930 :                     *operator = OP_NOT;
     304         930 :                     return PT_OPR;
     305             :                 }
     306       11224 :                 else if (t_iseq(state->buf, '('))
     307             :                 {
     308        1062 :                     state->buf++;
     309        1062 :                     state->state = WAITOPERAND;
     310        1062 :                     state->count++;
     311        1062 :                     return PT_OPEN;
     312             :                 }
     313       10162 :                 else if (t_iseq(state->buf, ':'))
     314             :                 {
     315             :                     /* generic syntax error message is fine */
     316           0 :                     return PT_ERR;
     317             :                 }
     318       10162 :                 else if (!t_isspace(state->buf))
     319             :                 {
     320             :                     /*
     321             :                      * We rely on the tsvector parser to parse the value for
     322             :                      * us
     323             :                      */
     324        7230 :                     reset_tsvector_parser(state->valstate, state->buf);
     325        7230 :                     if (gettoken_tsvector(state->valstate, strval, lenval,
     326             :                                           NULL, NULL, &state->buf))
     327             :                     {
     328        7206 :                         state->buf = get_modifiers(state->buf, weight, prefix);
     329        7206 :                         state->state = WAITOPERATOR;
     330        7206 :                         return PT_VAL;
     331             :                     }
     332          24 :                     else if (SOFT_ERROR_OCCURRED(state->escontext))
     333             :                     {
     334             :                         /* gettoken_tsvector reported a soft error */
     335           0 :                         return PT_ERR;
     336             :                     }
     337          24 :                     else if (state->state == WAITFIRSTOPERAND)
     338             :                     {
     339          24 :                         return PT_END;
     340             :                     }
     341             :                     else
     342           0 :                         ereturn(state->escontext, PT_ERR,
     343             :                                 (errcode(ERRCODE_SYNTAX_ERROR),
     344             :                                  errmsg("no operand in tsquery: \"%s\"",
     345             :                                         state->buffer)));
     346             :                 }
     347        2932 :                 break;
     348             : 
     349       11230 :             case WAITOPERATOR:
     350       11230 :                 if (t_iseq(state->buf, '&'))
     351             :                 {
     352        1330 :                     state->buf++;
     353        1330 :                     state->state = WAITOPERAND;
     354        1330 :                     *operator = OP_AND;
     355        1330 :                     return PT_OPR;
     356             :                 }
     357        9900 :                 else if (t_iseq(state->buf, '|'))
     358             :                 {
     359         822 :                     state->buf++;
     360         822 :                     state->state = WAITOPERAND;
     361         822 :                     *operator = OP_OR;
     362         822 :                     return PT_OPR;
     363             :                 }
     364        9078 :                 else if (parse_phrase_operator(state, weight))
     365             :                 {
     366             :                     /* weight var is used as storage for distance */
     367        1734 :                     state->state = WAITOPERAND;
     368        1734 :                     *operator = OP_PHRASE;
     369        1734 :                     return PT_OPR;
     370             :                 }
     371        7344 :                 else if (SOFT_ERROR_OCCURRED(state->escontext))
     372             :                 {
     373             :                     /* parse_phrase_operator reported a soft error */
     374           6 :                     return PT_ERR;
     375             :                 }
     376        7338 :                 else if (t_iseq(state->buf, ')'))
     377             :                 {
     378        1062 :                     state->buf++;
     379        1062 :                     state->count--;
     380        1062 :                     return (state->count < 0) ? PT_ERR : PT_CLOSE;
     381             :                 }
     382        6276 :                 else if (*state->buf == '\0')
     383             :                 {
     384        3302 :                     return (state->count) ? PT_ERR : PT_END;
     385             :                 }
     386        2974 :                 else if (!t_isspace(state->buf))
     387             :                 {
     388          12 :                     return PT_ERR;
     389             :                 }
     390        2962 :                 break;
     391             :         }
     392             : 
     393        5894 :         state->buf += pg_mblen(state->buf);
     394             :     }
     395             : }
     396             : 
     397             : static ts_tokentype
     398        2262 : gettoken_query_websearch(TSQueryParserState state, int8 *operator,
     399             :                          int *lenval, char **strval,
     400             :                          int16 *weight, bool *prefix)
     401             : {
     402        2262 :     *weight = 0;
     403        2262 :     *prefix = false;
     404             : 
     405             :     while (true)
     406             :     {
     407        3156 :         switch (state->state)
     408             :         {
     409        1344 :             case WAITFIRSTOPERAND:
     410             :             case WAITOPERAND:
     411        1344 :                 if (t_iseq(state->buf, '-'))
     412             :                 {
     413          66 :                     state->buf++;
     414          66 :                     state->state = WAITOPERAND;
     415             : 
     416          66 :                     *operator = OP_NOT;
     417          66 :                     return PT_OPR;
     418             :                 }
     419        1278 :                 else if (t_iseq(state->buf, '"'))
     420             :                 {
     421             :                     /* Everything in quotes is processed as a single token */
     422             : 
     423             :                     /* skip opening quote */
     424         192 :                     state->buf++;
     425         192 :                     *strval = state->buf;
     426             : 
     427             :                     /* iterate to the closing quote or end of the string */
     428        1740 :                     while (*state->buf != '\0' && !t_iseq(state->buf, '"'))
     429        1548 :                         state->buf++;
     430         192 :                     *lenval = state->buf - *strval;
     431             : 
     432             :                     /* skip closing quote if not end of the string */
     433         192 :                     if (*state->buf != '\0')
     434         168 :                         state->buf++;
     435             : 
     436         192 :                     state->state = WAITOPERATOR;
     437         192 :                     state->count++;
     438         192 :                     return PT_VAL;
     439             :                 }
     440        1086 :                 else if (ISOPERATOR(state->buf))
     441             :                 {
     442             :                     /* ignore, else gettoken_tsvector() will raise an error */
     443         102 :                     state->buf++;
     444         102 :                     state->state = WAITOPERAND;
     445         102 :                     continue;
     446             :                 }
     447         984 :                 else if (!t_isspace(state->buf))
     448             :                 {
     449             :                     /*
     450             :                      * We rely on the tsvector parser to parse the value for
     451             :                      * us
     452             :                      */
     453         906 :                     reset_tsvector_parser(state->valstate, state->buf);
     454         906 :                     if (gettoken_tsvector(state->valstate, strval, lenval,
     455             :                                           NULL, NULL, &state->buf))
     456             :                     {
     457         906 :                         state->state = WAITOPERATOR;
     458         906 :                         return PT_VAL;
     459             :                     }
     460           0 :                     else if (SOFT_ERROR_OCCURRED(state->escontext))
     461             :                     {
     462             :                         /* gettoken_tsvector reported a soft error */
     463           0 :                         return PT_ERR;
     464             :                     }
     465           0 :                     else if (state->state == WAITFIRSTOPERAND)
     466             :                     {
     467           0 :                         return PT_END;
     468             :                     }
     469             :                     else
     470             :                     {
     471             :                         /* finally, we have to provide an operand */
     472           0 :                         pushStop(state);
     473           0 :                         return PT_END;
     474             :                     }
     475             :                 }
     476          78 :                 break;
     477             : 
     478        1812 :             case WAITOPERATOR:
     479        1812 :                 if (*state->buf == '\0')
     480             :                 {
     481         414 :                     return PT_END;
     482             :                 }
     483        1398 :                 else if (parse_or_operator(state))
     484             :                 {
     485         108 :                     state->state = WAITOPERAND;
     486         108 :                     *operator = OP_OR;
     487         108 :                     return PT_OPR;
     488             :                 }
     489        1290 :                 else if (ISOPERATOR(state->buf))
     490             :                 {
     491             :                     /* ignore other operators in this state too */
     492         114 :                     state->buf++;
     493         114 :                     continue;
     494             :                 }
     495        1176 :                 else if (!t_isspace(state->buf))
     496             :                 {
     497             :                     /* insert implicit AND between operands */
     498         576 :                     state->state = WAITOPERAND;
     499         576 :                     *operator = OP_AND;
     500         576 :                     return PT_OPR;
     501             :                 }
     502         600 :                 break;
     503             :         }
     504             : 
     505         678 :         state->buf += pg_mblen(state->buf);
     506             :     }
     507             : }
     508             : 
     509             : static ts_tokentype
     510         216 : gettoken_query_plain(TSQueryParserState state, int8 *operator,
     511             :                      int *lenval, char **strval,
     512             :                      int16 *weight, bool *prefix)
     513             : {
     514         216 :     *weight = 0;
     515         216 :     *prefix = false;
     516             : 
     517         216 :     if (*state->buf == '\0')
     518         108 :         return PT_END;
     519             : 
     520         108 :     *strval = state->buf;
     521         108 :     *lenval = strlen(state->buf);
     522         108 :     state->buf += *lenval;
     523         108 :     state->count++;
     524         108 :     return PT_VAL;
     525             : }
     526             : 
     527             : /*
     528             :  * Push an operator to state->polstr
     529             :  */
     530             : void
     531        6238 : pushOperator(TSQueryParserState state, int8 oper, int16 distance)
     532             : {
     533             :     QueryOperator *tmp;
     534             : 
     535             :     Assert(oper == OP_NOT || oper == OP_AND || oper == OP_OR || oper == OP_PHRASE);
     536             : 
     537        6238 :     tmp = (QueryOperator *) palloc0(sizeof(QueryOperator));
     538        6238 :     tmp->type = QI_OPR;
     539        6238 :     tmp->oper = oper;
     540        6238 :     tmp->distance = (oper == OP_PHRASE) ? distance : 0;
     541             :     /* left is filled in later with findoprnd */
     542             : 
     543        6238 :     state->polstr = lcons(tmp, state->polstr);
     544        6238 : }
     545             : 
     546             : static void
     547        8418 : pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight, bool prefix)
     548             : {
     549             :     QueryOperand *tmp;
     550             : 
     551        8418 :     if (distance >= MAXSTRPOS)
     552           0 :         ereturn(state->escontext,,
     553             :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     554             :                  errmsg("value is too big in tsquery: \"%s\"",
     555             :                         state->buffer)));
     556        8418 :     if (lenval >= MAXSTRLEN)
     557           0 :         ereturn(state->escontext,,
     558             :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     559             :                  errmsg("operand is too long in tsquery: \"%s\"",
     560             :                         state->buffer)));
     561             : 
     562        8418 :     tmp = (QueryOperand *) palloc0(sizeof(QueryOperand));
     563        8418 :     tmp->type = QI_VAL;
     564        8418 :     tmp->weight = weight;
     565        8418 :     tmp->prefix = prefix;
     566        8418 :     tmp->valcrc = (int32) valcrc;
     567        8418 :     tmp->length = lenval;
     568        8418 :     tmp->distance = distance;
     569             : 
     570        8418 :     state->polstr = lcons(tmp, state->polstr);
     571             : }
     572             : 
     573             : /*
     574             :  * Push an operand to state->polstr.
     575             :  *
     576             :  * strval must point to a string equal to state->curop. lenval is the length
     577             :  * of the string.
     578             :  */
     579             : void
     580        8418 : pushValue(TSQueryParserState state, char *strval, int lenval, int16 weight, bool prefix)
     581             : {
     582             :     pg_crc32    valcrc;
     583             : 
     584        8418 :     if (lenval >= MAXSTRLEN)
     585           0 :         ereturn(state->escontext,,
     586             :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     587             :                  errmsg("word is too long in tsquery: \"%s\"",
     588             :                         state->buffer)));
     589             : 
     590        8418 :     INIT_LEGACY_CRC32(valcrc);
     591       29712 :     COMP_LEGACY_CRC32(valcrc, strval, lenval);
     592        8418 :     FIN_LEGACY_CRC32(valcrc);
     593        8418 :     pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight, prefix);
     594             : 
     595             :     /* append the value string to state.op, enlarging buffer if needed first */
     596        8418 :     while (state->curop - state->op + lenval + 1 >= state->lenop)
     597             :     {
     598           0 :         int         used = state->curop - state->op;
     599             : 
     600           0 :         state->lenop *= 2;
     601           0 :         state->op = (char *) repalloc(state->op, state->lenop);
     602           0 :         state->curop = state->op + used;
     603             :     }
     604        8418 :     memcpy(state->curop, strval, lenval);
     605        8418 :     state->curop += lenval;
     606        8418 :     *(state->curop) = '\0';
     607        8418 :     state->curop++;
     608        8418 :     state->sumlen += lenval + 1 /* \0 */ ;
     609             : }
     610             : 
     611             : 
     612             : /*
     613             :  * Push a stopword placeholder to state->polstr
     614             :  */
     615             : void
     616         666 : pushStop(TSQueryParserState state)
     617             : {
     618             :     QueryOperand *tmp;
     619             : 
     620         666 :     tmp = (QueryOperand *) palloc0(sizeof(QueryOperand));
     621         666 :     tmp->type = QI_VALSTOP;
     622             : 
     623         666 :     state->polstr = lcons(tmp, state->polstr);
     624         666 : }
     625             : 
     626             : 
     627             : #define STACKDEPTH  32
     628             : 
     629             : typedef struct OperatorElement
     630             : {
     631             :     int8        op;
     632             :     int16       distance;
     633             : } OperatorElement;
     634             : 
     635             : static void
     636        5566 : pushOpStack(OperatorElement *stack, int *lenstack, int8 op, int16 distance)
     637             : {
     638        5566 :     if (*lenstack == STACKDEPTH)    /* internal error */
     639           0 :         elog(ERROR, "tsquery stack too small");
     640             : 
     641        5566 :     stack[*lenstack].op = op;
     642        5566 :     stack[*lenstack].distance = distance;
     643             : 
     644        5566 :     (*lenstack)++;
     645        5566 : }
     646             : 
     647             : static void
     648       10476 : cleanOpStack(TSQueryParserState state,
     649             :              OperatorElement *stack, int *lenstack, int8 op)
     650             : {
     651       10476 :     int         opPriority = OP_PRIORITY(op);
     652             : 
     653       16042 :     while (*lenstack)
     654             :     {
     655             :         /* NOT is right associative unlike to others */
     656        6052 :         if ((op != OP_NOT && opPriority > OP_PRIORITY(stack[*lenstack - 1].op)) ||
     657         318 :             (op == OP_NOT && opPriority >= OP_PRIORITY(stack[*lenstack - 1].op)))
     658             :             break;
     659             : 
     660        5566 :         (*lenstack)--;
     661        5566 :         pushOperator(state, stack[*lenstack].op,
     662        5566 :                      stack[*lenstack].distance);
     663             :     }
     664       10476 : }
     665             : 
     666             : /*
     667             :  * Make polish (prefix) notation of query.
     668             :  *
     669             :  * See parse_tsquery for explanation of pushval.
     670             :  */
     671             : static void
     672        4928 : makepol(TSQueryParserState state,
     673             :         PushFunction pushval,
     674             :         Datum opaque)
     675             : {
     676        4928 :     int8        operator = 0;
     677             :     ts_tokentype type;
     678        4928 :     int         lenval = 0;
     679        4928 :     char       *strval = NULL;
     680             :     OperatorElement opstack[STACKDEPTH];
     681        4928 :     int         lenstack = 0;
     682        4928 :     int16       weight = 0;
     683             :     bool        prefix;
     684             : 
     685             :     /* since this function recurses, it could be driven to stack overflow */
     686        4928 :     check_stack_depth();
     687             : 
     688       19968 :     while ((type = state->gettoken(state, &operator,
     689             :                                    &lenval, &strval,
     690             :                                    &weight, &prefix)) != PT_END)
     691             :     {
     692       16120 :         switch (type)
     693             :         {
     694        8412 :             case PT_VAL:
     695        8412 :                 pushval(opaque, state, strval, lenval, weight, prefix);
     696        8412 :                 break;
     697        5566 :             case PT_OPR:
     698        5566 :                 cleanOpStack(state, opstack, &lenstack, operator);
     699        5566 :                 pushOpStack(opstack, &lenstack, operator, weight);
     700        5566 :                 break;
     701        1062 :             case PT_OPEN:
     702        1062 :                 makepol(state, pushval, opaque);
     703        1062 :                 break;
     704        1062 :             case PT_CLOSE:
     705        1062 :                 cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */ );
     706        1080 :                 return;
     707          18 :             case PT_ERR:
     708             :             default:
     709             :                 /* don't overwrite a soft error saved by gettoken function */
     710          18 :                 if (!SOFT_ERROR_OCCURRED(state->escontext))
     711          12 :                     errsave(state->escontext,
     712             :                             (errcode(ERRCODE_SYNTAX_ERROR),
     713             :                              errmsg("syntax error in tsquery: \"%s\"",
     714             :                                     state->buffer)));
     715          18 :                 return;
     716             :         }
     717             :         /* detect soft error in pushval or recursion */
     718       15040 :         if (SOFT_ERROR_OCCURRED(state->escontext))
     719           0 :             return;
     720             :     }
     721             : 
     722        3848 :     cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */ );
     723             : }
     724             : 
     725             : static void
     726       15304 : findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes, bool *needcleanup)
     727             : {
     728             :     /* since this function recurses, it could be driven to stack overflow. */
     729       15304 :     check_stack_depth();
     730             : 
     731       15304 :     if (*pos >= nnodes)
     732           0 :         elog(ERROR, "malformed tsquery: operand not found");
     733             : 
     734       15304 :     if (ptr[*pos].type == QI_VAL)
     735             :     {
     736        8400 :         (*pos)++;
     737             :     }
     738        6904 :     else if (ptr[*pos].type == QI_VALSTOP)
     739             :     {
     740         666 :         *needcleanup = true;    /* we'll have to remove stop words */
     741         666 :         (*pos)++;
     742             :     }
     743             :     else
     744             :     {
     745             :         Assert(ptr[*pos].type == QI_OPR);
     746             : 
     747        6238 :         if (ptr[*pos].qoperator.oper == OP_NOT)
     748             :         {
     749         996 :             ptr[*pos].qoperator.left = 1;   /* fixed offset */
     750         996 :             (*pos)++;
     751             : 
     752             :             /* process the only argument */
     753         996 :             findoprnd_recurse(ptr, pos, nnodes, needcleanup);
     754             :         }
     755             :         else
     756             :         {
     757        5242 :             QueryOperator *curitem = &ptr[*pos].qoperator;
     758        5242 :             int         tmp = *pos; /* save current position */
     759             : 
     760             :             Assert(curitem->oper == OP_AND ||
     761             :                    curitem->oper == OP_OR ||
     762             :                    curitem->oper == OP_PHRASE);
     763             : 
     764        5242 :             (*pos)++;
     765             : 
     766             :             /* process RIGHT argument */
     767        5242 :             findoprnd_recurse(ptr, pos, nnodes, needcleanup);
     768             : 
     769        5242 :             curitem->left = *pos - tmp; /* set LEFT arg's offset */
     770             : 
     771             :             /* process LEFT argument */
     772        5242 :             findoprnd_recurse(ptr, pos, nnodes, needcleanup);
     773             :         }
     774             :     }
     775       15304 : }
     776             : 
     777             : 
     778             : /*
     779             :  * Fill in the left-fields previously left unfilled.
     780             :  * The input QueryItems must be in polish (prefix) notation.
     781             :  * Also, set *needcleanup to true if there are any QI_VALSTOP nodes.
     782             :  */
     783             : static void
     784        3824 : findoprnd(QueryItem *ptr, int size, bool *needcleanup)
     785             : {
     786             :     uint32      pos;
     787             : 
     788        3824 :     *needcleanup = false;
     789        3824 :     pos = 0;
     790        3824 :     findoprnd_recurse(ptr, &pos, size, needcleanup);
     791             : 
     792        3824 :     if (pos != size)
     793           0 :         elog(ERROR, "malformed tsquery: extra nodes");
     794        3824 : }
     795             : 
     796             : 
     797             : /*
     798             :  * Parse the tsquery stored in "buf".
     799             :  *
     800             :  * Each value (operand) in the query is passed to pushval. pushval can
     801             :  * transform the simple value to an arbitrarily complex expression using
     802             :  * pushValue and pushOperator. It must push a single value with pushValue,
     803             :  * a complete expression with all operands, or a stopword placeholder
     804             :  * with pushStop, otherwise the prefix notation representation will be broken,
     805             :  * having an operator with no operand.
     806             :  *
     807             :  * opaque is passed on to pushval as is, pushval can use it to store its
     808             :  * private state.
     809             :  *
     810             :  * The pushval function can record soft errors via escontext.
     811             :  * Callers must check SOFT_ERROR_OCCURRED to detect that.
     812             :  *
     813             :  * A bitmask of flags (see ts_utils.h) and an error context object
     814             :  * can be provided as well.  If a soft error occurs, NULL is returned.
     815             :  */
     816             : TSQuery
     817        3866 : parse_tsquery(char *buf,
     818             :               PushFunction pushval,
     819             :               Datum opaque,
     820             :               int flags,
     821             :               Node *escontext)
     822             : {
     823             :     struct TSQueryParserStateData state;
     824             :     int         i;
     825             :     TSQuery     query;
     826             :     int         commonlen;
     827             :     QueryItem  *ptr;
     828             :     ListCell   *cell;
     829             :     bool        noisy;
     830             :     bool        needcleanup;
     831        3866 :     int         tsv_flags = P_TSV_OPR_IS_DELIM | P_TSV_IS_TSQUERY;
     832             : 
     833             :     /* plain should not be used with web */
     834             :     Assert((flags & (P_TSQ_PLAIN | P_TSQ_WEB)) != (P_TSQ_PLAIN | P_TSQ_WEB));
     835             : 
     836             :     /* select suitable tokenizer */
     837        3866 :     if (flags & P_TSQ_PLAIN)
     838         108 :         state.gettoken = gettoken_query_plain;
     839        3758 :     else if (flags & P_TSQ_WEB)
     840             :     {
     841         414 :         state.gettoken = gettoken_query_websearch;
     842         414 :         tsv_flags |= P_TSV_IS_WEB;
     843             :     }
     844             :     else
     845        3344 :         state.gettoken = gettoken_query_standard;
     846             : 
     847             :     /* emit nuisance NOTICEs only if not doing soft errors */
     848        3866 :     noisy = !(escontext && IsA(escontext, ErrorSaveContext));
     849             : 
     850             :     /* init state */
     851        3866 :     state.buffer = buf;
     852        3866 :     state.buf = buf;
     853        3866 :     state.count = 0;
     854        3866 :     state.state = WAITFIRSTOPERAND;
     855        3866 :     state.polstr = NIL;
     856        3866 :     state.escontext = escontext;
     857             : 
     858             :     /* init value parser's state */
     859        3866 :     state.valstate = init_tsvector_parser(state.buffer, tsv_flags, escontext);
     860             : 
     861             :     /* init list of operand */
     862        3866 :     state.sumlen = 0;
     863        3866 :     state.lenop = 64;
     864        3866 :     state.curop = state.op = (char *) palloc(state.lenop);
     865        3866 :     *(state.curop) = '\0';
     866             : 
     867             :     /* parse query & make polish notation (postfix, but in reverse order) */
     868        3866 :     makepol(&state, pushval, opaque);
     869             : 
     870        3866 :     close_tsvector_parser(state.valstate);
     871             : 
     872        3866 :     if (SOFT_ERROR_OCCURRED(escontext))
     873          18 :         return NULL;
     874             : 
     875        3848 :     if (state.polstr == NIL)
     876             :     {
     877          24 :         if (noisy)
     878          24 :             ereport(NOTICE,
     879             :                     (errmsg("text-search query doesn't contain lexemes: \"%s\"",
     880             :                             state.buffer)));
     881          24 :         query = (TSQuery) palloc(HDRSIZETQ);
     882          24 :         SET_VARSIZE(query, HDRSIZETQ);
     883          24 :         query->size = 0;
     884          24 :         return query;
     885             :     }
     886             : 
     887        3824 :     if (TSQUERY_TOO_BIG(list_length(state.polstr), state.sumlen))
     888           0 :         ereturn(escontext, NULL,
     889             :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     890             :                  errmsg("tsquery is too large")));
     891        3824 :     commonlen = COMPUTESIZE(list_length(state.polstr), state.sumlen);
     892             : 
     893             :     /* Pack the QueryItems in the final TSQuery struct to return to caller */
     894        3824 :     query = (TSQuery) palloc0(commonlen);
     895        3824 :     SET_VARSIZE(query, commonlen);
     896        3824 :     query->size = list_length(state.polstr);
     897        3824 :     ptr = GETQUERY(query);
     898             : 
     899             :     /* Copy QueryItems to TSQuery */
     900        3824 :     i = 0;
     901       19128 :     foreach(cell, state.polstr)
     902             :     {
     903       15304 :         QueryItem  *item = (QueryItem *) lfirst(cell);
     904             : 
     905       15304 :         switch (item->type)
     906             :         {
     907        8400 :             case QI_VAL:
     908        8400 :                 memcpy(&ptr[i], item, sizeof(QueryOperand));
     909        8400 :                 break;
     910         666 :             case QI_VALSTOP:
     911         666 :                 ptr[i].type = QI_VALSTOP;
     912         666 :                 break;
     913        6238 :             case QI_OPR:
     914        6238 :                 memcpy(&ptr[i], item, sizeof(QueryOperator));
     915        6238 :                 break;
     916           0 :             default:
     917           0 :                 elog(ERROR, "unrecognized QueryItem type: %d", item->type);
     918             :         }
     919       15304 :         i++;
     920             :     }
     921             : 
     922             :     /* Copy all the operand strings to TSQuery */
     923        3824 :     memcpy(GETOPERAND(query), state.op, state.sumlen);
     924        3824 :     pfree(state.op);
     925             : 
     926             :     /*
     927             :      * Set left operand pointers for every operator.  While we're at it,
     928             :      * detect whether there are any QI_VALSTOP nodes.
     929             :      */
     930        3824 :     findoprnd(ptr, query->size, &needcleanup);
     931             : 
     932             :     /*
     933             :      * If there are QI_VALSTOP nodes, delete them and simplify the tree.
     934             :      */
     935        3824 :     if (needcleanup)
     936         432 :         query = cleanup_tsquery_stopwords(query, noisy);
     937             : 
     938        3824 :     return query;
     939             : }
     940             : 
     941             : static void
     942        5300 : pushval_asis(Datum opaque, TSQueryParserState state, char *strval, int lenval,
     943             :              int16 weight, bool prefix)
     944             : {
     945        5300 :     pushValue(state, strval, lenval, weight, prefix);
     946        5300 : }
     947             : 
     948             : /*
     949             :  * in without morphology
     950             :  */
     951             : Datum
     952        2590 : tsqueryin(PG_FUNCTION_ARGS)
     953             : {
     954        2590 :     char       *in = PG_GETARG_CSTRING(0);
     955        2590 :     Node       *escontext = fcinfo->context;
     956             : 
     957        2590 :     PG_RETURN_TSQUERY(parse_tsquery(in,
     958             :                                     pushval_asis,
     959             :                                     PointerGetDatum(NULL),
     960             :                                     0,
     961             :                                     escontext));
     962             : }
     963             : 
     964             : /*
     965             :  * out function
     966             :  */
     967             : typedef struct
     968             : {
     969             :     QueryItem  *curpol;
     970             :     char       *buf;
     971             :     char       *cur;
     972             :     char       *op;
     973             :     int         buflen;
     974             : } INFIX;
     975             : 
     976             : /* Makes sure inf->buf is large enough for adding 'addsize' bytes */
     977             : #define RESIZEBUF(inf, addsize) \
     978             : while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
     979             : { \
     980             :     int len = (inf)->cur - (inf)->buf; \
     981             :     (inf)->buflen *= 2; \
     982             :     (inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \
     983             :     (inf)->cur = (inf)->buf + len; \
     984             : }
     985             : 
     986             : /*
     987             :  * recursively traverse the tree and
     988             :  * print it in infix (human-readable) form
     989             :  */
     990             : static void
     991        7288 : infix(INFIX *in, int parentPriority, bool rightPhraseOp)
     992             : {
     993             :     /* since this function recurses, it could be driven to stack overflow. */
     994        7288 :     check_stack_depth();
     995             : 
     996        7288 :     if (in->curpol->type == QI_VAL)
     997             :     {
     998        4216 :         QueryOperand *curpol = &in->curpol->qoperand;
     999        4216 :         char       *op = in->op + curpol->distance;
    1000             :         int         clen;
    1001             : 
    1002        6882 :         RESIZEBUF(in, curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 6);
    1003        4216 :         *(in->cur) = '\'';
    1004        4216 :         in->cur++;
    1005       16364 :         while (*op)
    1006             :         {
    1007       12148 :             if (t_iseq(op, '\''))
    1008             :             {
    1009          12 :                 *(in->cur) = '\'';
    1010          12 :                 in->cur++;
    1011             :             }
    1012       12136 :             else if (t_iseq(op, '\\'))
    1013             :             {
    1014           6 :                 *(in->cur) = '\\';
    1015           6 :                 in->cur++;
    1016             :             }
    1017       12148 :             COPYCHAR(in->cur, op);
    1018             : 
    1019       12148 :             clen = pg_mblen(op);
    1020       12148 :             op += clen;
    1021       12148 :             in->cur += clen;
    1022             :         }
    1023        4216 :         *(in->cur) = '\'';
    1024        4216 :         in->cur++;
    1025        4216 :         if (curpol->weight || curpol->prefix)
    1026             :         {
    1027         174 :             *(in->cur) = ':';
    1028         174 :             in->cur++;
    1029         174 :             if (curpol->prefix)
    1030             :             {
    1031          24 :                 *(in->cur) = '*';
    1032          24 :                 in->cur++;
    1033             :             }
    1034         174 :             if (curpol->weight & (1 << 3))
    1035             :             {
    1036          60 :                 *(in->cur) = 'A';
    1037          60 :                 in->cur++;
    1038             :             }
    1039         174 :             if (curpol->weight & (1 << 2))
    1040             :             {
    1041          96 :                 *(in->cur) = 'B';
    1042          96 :                 in->cur++;
    1043             :             }
    1044         174 :             if (curpol->weight & (1 << 1))
    1045             :             {
    1046          18 :                 *(in->cur) = 'C';
    1047          18 :                 in->cur++;
    1048             :             }
    1049         174 :             if (curpol->weight & 1)
    1050             :             {
    1051           6 :                 *(in->cur) = 'D';
    1052           6 :                 in->cur++;
    1053             :             }
    1054             :         }
    1055        4216 :         *(in->cur) = '\0';
    1056        4216 :         in->curpol++;
    1057             :     }
    1058        3072 :     else if (in->curpol->qoperator.oper == OP_NOT)
    1059             :     {
    1060         372 :         int         priority = QO_PRIORITY(in->curpol);
    1061             : 
    1062         372 :         if (priority < parentPriority)
    1063             :         {
    1064           0 :             RESIZEBUF(in, 2);
    1065           0 :             sprintf(in->cur, "( ");
    1066           0 :             in->cur = strchr(in->cur, '\0');
    1067             :         }
    1068         372 :         RESIZEBUF(in, 1);
    1069         372 :         *(in->cur) = '!';
    1070         372 :         in->cur++;
    1071         372 :         *(in->cur) = '\0';
    1072         372 :         in->curpol++;
    1073             : 
    1074         372 :         infix(in, priority, false);
    1075         372 :         if (priority < parentPriority)
    1076             :         {
    1077           0 :             RESIZEBUF(in, 2);
    1078           0 :             sprintf(in->cur, " )");
    1079           0 :             in->cur = strchr(in->cur, '\0');
    1080             :         }
    1081             :     }
    1082             :     else
    1083             :     {
    1084        2700 :         int8        op = in->curpol->qoperator.oper;
    1085        2700 :         int         priority = QO_PRIORITY(in->curpol);
    1086        2700 :         int16       distance = in->curpol->qoperator.distance;
    1087             :         INFIX       nrm;
    1088        2700 :         bool        needParenthesis = false;
    1089             : 
    1090        2700 :         in->curpol++;
    1091        2700 :         if (priority < parentPriority ||
    1092             :         /* phrase operator depends on order */
    1093         720 :             (op == OP_PHRASE && rightPhraseOp))
    1094             :         {
    1095         332 :             needParenthesis = true;
    1096         332 :             RESIZEBUF(in, 2);
    1097         332 :             sprintf(in->cur, "( ");
    1098         332 :             in->cur = strchr(in->cur, '\0');
    1099             :         }
    1100             : 
    1101        2700 :         nrm.curpol = in->curpol;
    1102        2700 :         nrm.op = in->op;
    1103        2700 :         nrm.buflen = 16;
    1104        2700 :         nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
    1105             : 
    1106             :         /* get right operand */
    1107        2700 :         infix(&nrm, priority, (op == OP_PHRASE));
    1108             : 
    1109             :         /* get & print left operand */
    1110        2700 :         in->curpol = nrm.curpol;
    1111        2700 :         infix(in, priority, false);
    1112             : 
    1113             :         /* print operator & right operand */
    1114        3680 :         RESIZEBUF(in, 3 + (2 + 10 /* distance */ ) + (nrm.cur - nrm.buf));
    1115        2700 :         switch (op)
    1116             :         {
    1117         732 :             case OP_OR:
    1118         732 :                 sprintf(in->cur, " | %s", nrm.buf);
    1119         732 :                 break;
    1120        1236 :             case OP_AND:
    1121        1236 :                 sprintf(in->cur, " & %s", nrm.buf);
    1122        1236 :                 break;
    1123         732 :             case OP_PHRASE:
    1124         732 :                 if (distance != 1)
    1125         174 :                     sprintf(in->cur, " <%d> %s", distance, nrm.buf);
    1126             :                 else
    1127         558 :                     sprintf(in->cur, " <-> %s", nrm.buf);
    1128         732 :                 break;
    1129           0 :             default:
    1130             :                 /* OP_NOT is handled in above if-branch */
    1131           0 :                 elog(ERROR, "unrecognized operator type: %d", op);
    1132             :         }
    1133        2700 :         in->cur = strchr(in->cur, '\0');
    1134        2700 :         pfree(nrm.buf);
    1135             : 
    1136        2700 :         if (needParenthesis)
    1137             :         {
    1138         332 :             RESIZEBUF(in, 2);
    1139         332 :             sprintf(in->cur, " )");
    1140         332 :             in->cur = strchr(in->cur, '\0');
    1141             :         }
    1142             :     }
    1143        7288 : }
    1144             : 
    1145             : Datum
    1146        1546 : tsqueryout(PG_FUNCTION_ARGS)
    1147             : {
    1148        1546 :     TSQuery     query = PG_GETARG_TSQUERY(0);
    1149             :     INFIX       nrm;
    1150             : 
    1151        1546 :     if (query->size == 0)
    1152             :     {
    1153          30 :         char       *b = palloc(1);
    1154             : 
    1155          30 :         *b = '\0';
    1156          30 :         PG_RETURN_POINTER(b);
    1157             :     }
    1158        1516 :     nrm.curpol = GETQUERY(query);
    1159        1516 :     nrm.buflen = 32;
    1160        1516 :     nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
    1161        1516 :     *(nrm.cur) = '\0';
    1162        1516 :     nrm.op = GETOPERAND(query);
    1163        1516 :     infix(&nrm, -1 /* lowest priority */ , false);
    1164             : 
    1165        1516 :     PG_FREE_IF_COPY(query, 0);
    1166        1516 :     PG_RETURN_CSTRING(nrm.buf);
    1167             : }
    1168             : 
    1169             : /*
    1170             :  * Binary Input / Output functions. The binary format is as follows:
    1171             :  *
    1172             :  * uint32    number of operators/operands in the query
    1173             :  *
    1174             :  * Followed by the operators and operands, in prefix notation. For each
    1175             :  * operand:
    1176             :  *
    1177             :  * uint8    type, QI_VAL
    1178             :  * uint8    weight
    1179             :  *          operand text in client encoding, null-terminated
    1180             :  * uint8    prefix
    1181             :  *
    1182             :  * For each operator:
    1183             :  * uint8    type, QI_OPR
    1184             :  * uint8    operator, one of OP_AND, OP_PHRASE OP_OR, OP_NOT.
    1185             :  * uint16   distance (only for OP_PHRASE)
    1186             :  */
    1187             : Datum
    1188           0 : tsquerysend(PG_FUNCTION_ARGS)
    1189             : {
    1190           0 :     TSQuery     query = PG_GETARG_TSQUERY(0);
    1191             :     StringInfoData buf;
    1192             :     int         i;
    1193           0 :     QueryItem  *item = GETQUERY(query);
    1194             : 
    1195           0 :     pq_begintypsend(&buf);
    1196             : 
    1197           0 :     pq_sendint32(&buf, query->size);
    1198           0 :     for (i = 0; i < query->size; i++)
    1199             :     {
    1200           0 :         pq_sendint8(&buf, item->type);
    1201             : 
    1202           0 :         switch (item->type)
    1203             :         {
    1204           0 :             case QI_VAL:
    1205           0 :                 pq_sendint8(&buf, item->qoperand.weight);
    1206           0 :                 pq_sendint8(&buf, item->qoperand.prefix);
    1207           0 :                 pq_sendstring(&buf, GETOPERAND(query) + item->qoperand.distance);
    1208           0 :                 break;
    1209           0 :             case QI_OPR:
    1210           0 :                 pq_sendint8(&buf, item->qoperator.oper);
    1211           0 :                 if (item->qoperator.oper == OP_PHRASE)
    1212           0 :                     pq_sendint16(&buf, item->qoperator.distance);
    1213           0 :                 break;
    1214           0 :             default:
    1215           0 :                 elog(ERROR, "unrecognized tsquery node type: %d", item->type);
    1216             :         }
    1217           0 :         item++;
    1218             :     }
    1219             : 
    1220           0 :     PG_FREE_IF_COPY(query, 0);
    1221             : 
    1222           0 :     PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
    1223             : }
    1224             : 
    1225             : Datum
    1226           0 : tsqueryrecv(PG_FUNCTION_ARGS)
    1227             : {
    1228           0 :     StringInfo  buf = (StringInfo) PG_GETARG_POINTER(0);
    1229             :     TSQuery     query;
    1230             :     int         i,
    1231             :                 len;
    1232             :     QueryItem  *item;
    1233             :     int         datalen;
    1234             :     char       *ptr;
    1235             :     uint32      size;
    1236             :     const char **operands;
    1237             :     bool        needcleanup;
    1238             : 
    1239           0 :     size = pq_getmsgint(buf, sizeof(uint32));
    1240           0 :     if (size > (MaxAllocSize / sizeof(QueryItem)))
    1241           0 :         elog(ERROR, "invalid size of tsquery");
    1242             : 
    1243             :     /* Allocate space to temporarily hold operand strings */
    1244           0 :     operands = palloc(size * sizeof(char *));
    1245             : 
    1246             :     /* Allocate space for all the QueryItems. */
    1247           0 :     len = HDRSIZETQ + sizeof(QueryItem) * size;
    1248           0 :     query = (TSQuery) palloc0(len);
    1249           0 :     query->size = size;
    1250           0 :     item = GETQUERY(query);
    1251             : 
    1252           0 :     datalen = 0;
    1253           0 :     for (i = 0; i < size; i++)
    1254             :     {
    1255           0 :         item->type = (int8) pq_getmsgint(buf, sizeof(int8));
    1256             : 
    1257           0 :         if (item->type == QI_VAL)
    1258             :         {
    1259             :             size_t      val_len;    /* length after recoding to server
    1260             :                                      * encoding */
    1261             :             uint8       weight;
    1262             :             uint8       prefix;
    1263             :             const char *val;
    1264             :             pg_crc32    valcrc;
    1265             : 
    1266           0 :             weight = (uint8) pq_getmsgint(buf, sizeof(uint8));
    1267           0 :             prefix = (uint8) pq_getmsgint(buf, sizeof(uint8));
    1268           0 :             val = pq_getmsgstring(buf);
    1269           0 :             val_len = strlen(val);
    1270             : 
    1271             :             /* Sanity checks */
    1272             : 
    1273           0 :             if (weight > 0xF)
    1274           0 :                 elog(ERROR, "invalid tsquery: invalid weight bitmap");
    1275             : 
    1276           0 :             if (val_len > MAXSTRLEN)
    1277           0 :                 elog(ERROR, "invalid tsquery: operand too long");
    1278             : 
    1279           0 :             if (datalen > MAXSTRPOS)
    1280           0 :                 elog(ERROR, "invalid tsquery: total operand length exceeded");
    1281             : 
    1282             :             /* Looks valid. */
    1283             : 
    1284           0 :             INIT_LEGACY_CRC32(valcrc);
    1285           0 :             COMP_LEGACY_CRC32(valcrc, val, val_len);
    1286           0 :             FIN_LEGACY_CRC32(valcrc);
    1287             : 
    1288           0 :             item->qoperand.weight = weight;
    1289           0 :             item->qoperand.prefix = (prefix) ? true : false;
    1290           0 :             item->qoperand.valcrc = (int32) valcrc;
    1291           0 :             item->qoperand.length = val_len;
    1292           0 :             item->qoperand.distance = datalen;
    1293             : 
    1294             :             /*
    1295             :              * Operand strings are copied to the final struct after this loop;
    1296             :              * here we just collect them to an array
    1297             :              */
    1298           0 :             operands[i] = val;
    1299             : 
    1300           0 :             datalen += val_len + 1; /* + 1 for the '\0' terminator */
    1301             :         }
    1302           0 :         else if (item->type == QI_OPR)
    1303             :         {
    1304             :             int8        oper;
    1305             : 
    1306           0 :             oper = (int8) pq_getmsgint(buf, sizeof(int8));
    1307           0 :             if (oper != OP_NOT && oper != OP_OR && oper != OP_AND && oper != OP_PHRASE)
    1308           0 :                 elog(ERROR, "invalid tsquery: unrecognized operator type %d",
    1309             :                      (int) oper);
    1310           0 :             if (i == size - 1)
    1311           0 :                 elog(ERROR, "invalid pointer to right operand");
    1312             : 
    1313           0 :             item->qoperator.oper = oper;
    1314           0 :             if (oper == OP_PHRASE)
    1315           0 :                 item->qoperator.distance = (int16) pq_getmsgint(buf, sizeof(int16));
    1316             :         }
    1317             :         else
    1318           0 :             elog(ERROR, "unrecognized tsquery node type: %d", item->type);
    1319             : 
    1320           0 :         item++;
    1321             :     }
    1322             : 
    1323             :     /* Enlarge buffer to make room for the operand values. */
    1324           0 :     query = (TSQuery) repalloc(query, len + datalen);
    1325           0 :     item = GETQUERY(query);
    1326           0 :     ptr = GETOPERAND(query);
    1327             : 
    1328             :     /*
    1329             :      * Fill in the left-pointers. Checks that the tree is well-formed as a
    1330             :      * side-effect.
    1331             :      */
    1332           0 :     findoprnd(item, size, &needcleanup);
    1333             : 
    1334             :     /* Can't have found any QI_VALSTOP nodes */
    1335             :     Assert(!needcleanup);
    1336             : 
    1337             :     /* Copy operands to output struct */
    1338           0 :     for (i = 0; i < size; i++)
    1339             :     {
    1340           0 :         if (item->type == QI_VAL)
    1341             :         {
    1342           0 :             memcpy(ptr, operands[i], item->qoperand.length + 1);
    1343           0 :             ptr += item->qoperand.length + 1;
    1344             :         }
    1345           0 :         item++;
    1346             :     }
    1347             : 
    1348           0 :     pfree(operands);
    1349             : 
    1350             :     Assert(ptr - GETOPERAND(query) == datalen);
    1351             : 
    1352           0 :     SET_VARSIZE(query, len + datalen);
    1353             : 
    1354           0 :     PG_RETURN_TSQUERY(query);
    1355             : }
    1356             : 
    1357             : /*
    1358             :  * debug function, used only for view query
    1359             :  * which will be executed in non-leaf pages in index
    1360             :  */
    1361             : Datum
    1362           0 : tsquerytree(PG_FUNCTION_ARGS)
    1363             : {
    1364           0 :     TSQuery     query = PG_GETARG_TSQUERY(0);
    1365             :     INFIX       nrm;
    1366             :     text       *res;
    1367             :     QueryItem  *q;
    1368             :     int         len;
    1369             : 
    1370           0 :     if (query->size == 0)
    1371             :     {
    1372           0 :         res = (text *) palloc(VARHDRSZ);
    1373           0 :         SET_VARSIZE(res, VARHDRSZ);
    1374           0 :         PG_RETURN_POINTER(res);
    1375             :     }
    1376             : 
    1377           0 :     q = clean_NOT(GETQUERY(query), &len);
    1378             : 
    1379           0 :     if (!q)
    1380             :     {
    1381           0 :         res = cstring_to_text("T");
    1382             :     }
    1383             :     else
    1384             :     {
    1385           0 :         nrm.curpol = q;
    1386           0 :         nrm.buflen = 32;
    1387           0 :         nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
    1388           0 :         *(nrm.cur) = '\0';
    1389           0 :         nrm.op = GETOPERAND(query);
    1390           0 :         infix(&nrm, -1, false);
    1391           0 :         res = cstring_to_text_with_len(nrm.buf, nrm.cur - nrm.buf);
    1392           0 :         pfree(q);
    1393             :     }
    1394             : 
    1395           0 :     PG_FREE_IF_COPY(query, 0);
    1396             : 
    1397           0 :     PG_RETURN_TEXT_P(res);
    1398             : }

Generated by: LCOV version 1.14