LCOV - code coverage report
Current view: top level - src/backend/utils/adt - tsginidx.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13beta1 Lines: 139 158 88.0 %
Date: 2020-05-29 00:07:09 Functions: 9 14 64.3 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * tsginidx.c
       4             :  *   GIN support functions for tsvector_ops
       5             :  *
       6             :  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
       7             :  *
       8             :  *
       9             :  * IDENTIFICATION
      10             :  *    src/backend/utils/adt/tsginidx.c
      11             :  *
      12             :  *-------------------------------------------------------------------------
      13             :  */
      14             : #include "postgres.h"
      15             : 
      16             : #include "access/gin.h"
      17             : #include "access/stratnum.h"
      18             : #include "miscadmin.h"
      19             : #include "tsearch/ts_type.h"
      20             : #include "tsearch/ts_utils.h"
      21             : #include "utils/builtins.h"
      22             : 
      23             : 
      24             : Datum
      25     1206376 : gin_cmp_tslexeme(PG_FUNCTION_ARGS)
      26             : {
      27     1206376 :     text       *a = PG_GETARG_TEXT_PP(0);
      28     1206376 :     text       *b = PG_GETARG_TEXT_PP(1);
      29             :     int         cmp;
      30             : 
      31     2412752 :     cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
      32     2412752 :                           VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b),
      33             :                           false);
      34             : 
      35     1206376 :     PG_FREE_IF_COPY(a, 0);
      36     1206376 :     PG_FREE_IF_COPY(b, 1);
      37     1206376 :     PG_RETURN_INT32(cmp);
      38             : }
      39             : 
      40             : Datum
      41         296 : gin_cmp_prefix(PG_FUNCTION_ARGS)
      42             : {
      43         296 :     text       *a = PG_GETARG_TEXT_PP(0);
      44         296 :     text       *b = PG_GETARG_TEXT_PP(1);
      45             : 
      46             : #ifdef NOT_USED
      47             :     StrategyNumber strategy = PG_GETARG_UINT16(2);
      48             :     Pointer     extra_data = PG_GETARG_POINTER(3);
      49             : #endif
      50             :     int         cmp;
      51             : 
      52         592 :     cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
      53         592 :                           VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b),
      54             :                           true);
      55             : 
      56         296 :     if (cmp < 0)
      57           8 :         cmp = 1;                /* prevent continue scan */
      58             : 
      59         296 :     PG_FREE_IF_COPY(a, 0);
      60         296 :     PG_FREE_IF_COPY(b, 1);
      61         296 :     PG_RETURN_INT32(cmp);
      62             : }
      63             : 
      64             : Datum
      65        2064 : gin_extract_tsvector(PG_FUNCTION_ARGS)
      66             : {
      67        2064 :     TSVector    vector = PG_GETARG_TSVECTOR(0);
      68        2064 :     int32      *nentries = (int32 *) PG_GETARG_POINTER(1);
      69        2064 :     Datum      *entries = NULL;
      70             : 
      71        2064 :     *nentries = vector->size;
      72        2064 :     if (vector->size > 0)
      73             :     {
      74             :         int         i;
      75        2028 :         WordEntry  *we = ARRPTR(vector);
      76             : 
      77        2028 :         entries = (Datum *) palloc(sizeof(Datum) * vector->size);
      78             : 
      79      117316 :         for (i = 0; i < vector->size; i++)
      80             :         {
      81             :             text       *txt;
      82             : 
      83      115288 :             txt = cstring_to_text_with_len(STRPTR(vector) + we->pos, we->len);
      84      115288 :             entries[i] = PointerGetDatum(txt);
      85             : 
      86      115288 :             we++;
      87             :         }
      88             :     }
      89             : 
      90        2064 :     PG_FREE_IF_COPY(vector, 0);
      91        2064 :     PG_RETURN_POINTER(entries);
      92             : }
      93             : 
      94             : Datum
      95         268 : gin_extract_tsquery(PG_FUNCTION_ARGS)
      96             : {
      97         268 :     TSQuery     query = PG_GETARG_TSQUERY(0);
      98         268 :     int32      *nentries = (int32 *) PG_GETARG_POINTER(1);
      99             : 
     100             :     /* StrategyNumber strategy = PG_GETARG_UINT16(2); */
     101         268 :     bool      **ptr_partialmatch = (bool **) PG_GETARG_POINTER(3);
     102         268 :     Pointer   **extra_data = (Pointer **) PG_GETARG_POINTER(4);
     103             : 
     104             :     /* bool   **nullFlags = (bool **) PG_GETARG_POINTER(5); */
     105         268 :     int32      *searchMode = (int32 *) PG_GETARG_POINTER(6);
     106         268 :     Datum      *entries = NULL;
     107             : 
     108         268 :     *nentries = 0;
     109             : 
     110         268 :     if (query->size > 0)
     111             :     {
     112         268 :         QueryItem  *item = GETQUERY(query);
     113             :         int32       i,
     114             :                     j;
     115             :         bool       *partialmatch;
     116             :         int        *map_item_operand;
     117             : 
     118             :         /*
     119             :          * If the query doesn't have any required positive matches (for
     120             :          * instance, it's something like '! foo'), we have to do a full index
     121             :          * scan.
     122             :          */
     123         268 :         if (tsquery_requires_match(item))
     124         204 :             *searchMode = GIN_SEARCH_MODE_DEFAULT;
     125             :         else
     126          64 :             *searchMode = GIN_SEARCH_MODE_ALL;
     127             : 
     128             :         /* count number of VAL items */
     129         268 :         j = 0;
     130        1056 :         for (i = 0; i < query->size; i++)
     131             :         {
     132         788 :             if (item[i].type == QI_VAL)
     133         480 :                 j++;
     134             :         }
     135         268 :         *nentries = j;
     136             : 
     137         268 :         entries = (Datum *) palloc(sizeof(Datum) * j);
     138         268 :         partialmatch = *ptr_partialmatch = (bool *) palloc(sizeof(bool) * j);
     139             : 
     140             :         /*
     141             :          * Make map to convert item's number to corresponding operand's (the
     142             :          * same, entry's) number. Entry's number is used in check array in
     143             :          * consistent method. We use the same map for each entry.
     144             :          */
     145         268 :         *extra_data = (Pointer *) palloc(sizeof(Pointer) * j);
     146         268 :         map_item_operand = (int *) palloc0(sizeof(int) * query->size);
     147             : 
     148             :         /* Now rescan the VAL items and fill in the arrays */
     149         268 :         j = 0;
     150        1056 :         for (i = 0; i < query->size; i++)
     151             :         {
     152         788 :             if (item[i].type == QI_VAL)
     153             :             {
     154         480 :                 QueryOperand *val = &item[i].qoperand;
     155             :                 text       *txt;
     156             : 
     157         480 :                 txt = cstring_to_text_with_len(GETOPERAND(query) + val->distance,
     158         480 :                                                val->length);
     159         480 :                 entries[j] = PointerGetDatum(txt);
     160         480 :                 partialmatch[j] = val->prefix;
     161         480 :                 (*extra_data)[j] = (Pointer) map_item_operand;
     162         480 :                 map_item_operand[i] = j;
     163         480 :                 j++;
     164             :             }
     165             :         }
     166             :     }
     167             : 
     168         268 :     PG_FREE_IF_COPY(query, 0);
     169             : 
     170         268 :     PG_RETURN_POINTER(entries);
     171             : }
     172             : 
     173             : typedef struct
     174             : {
     175             :     QueryItem  *first_item;
     176             :     GinTernaryValue *check;
     177             :     int        *map_item_operand;
     178             :     bool       *need_recheck;
     179             : } GinChkVal;
     180             : 
     181             : static GinTernaryValue
     182       22428 : checkcondition_gin_internal(GinChkVal *gcv, QueryOperand *val, ExecPhraseData *data)
     183             : {
     184             :     int         j;
     185             : 
     186             :     /*
     187             :      * if any val requiring a weight is used or caller needs position
     188             :      * information then set recheck flag
     189             :      */
     190       22428 :     if (val->weight != 0 || data != NULL)
     191           0 :         *(gcv->need_recheck) = true;
     192             : 
     193             :     /* convert item's number to corresponding entry's (operand's) number */
     194       22428 :     j = gcv->map_item_operand[((QueryItem *) val) - gcv->first_item];
     195             : 
     196             :     /* return presence of current entry in indexed value */
     197       22428 :     return gcv->check[j];
     198             : }
     199             : 
     200             : /*
     201             :  * Wrapper of check condition function for TS_execute.
     202             :  */
     203             : static bool
     204          32 : checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data)
     205             : {
     206          32 :     return checkcondition_gin_internal((GinChkVal *) checkval,
     207             :                                        val,
     208          32 :                                        data) != GIN_FALSE;
     209             : }
     210             : 
     211             : /*
     212             :  * Evaluate tsquery boolean expression using ternary logic.
     213             :  *
     214             :  * Note: the reason we can't use TS_execute() for this is that its API
     215             :  * for the checkcondition callback doesn't allow a MAYBE result to be
     216             :  * returned, but we might have MAYBEs in the gcv->check array.
     217             :  * Perhaps we should change that API.
     218             :  */
     219             : static GinTernaryValue
     220       54932 : TS_execute_ternary(GinChkVal *gcv, QueryItem *curitem, bool in_phrase)
     221             : {
     222             :     GinTernaryValue val1,
     223             :                 val2,
     224             :                 result;
     225             : 
     226             :     /* since this function recurses, it could be driven to stack overflow */
     227       54932 :     check_stack_depth();
     228             : 
     229       54932 :     if (curitem->type == QI_VAL)
     230             :         return
     231       22396 :             checkcondition_gin_internal(gcv,
     232             :                                         (QueryOperand *) curitem,
     233             :                                         NULL /* don't have position info */ );
     234             : 
     235       32536 :     switch (curitem->qoperator.oper)
     236             :     {
     237       15780 :         case OP_NOT:
     238             : 
     239             :             /*
     240             :              * Below a phrase search, force NOT's result to MAYBE.  We cannot
     241             :              * invert a TRUE result from the subexpression to FALSE, since
     242             :              * TRUE only says that the subexpression matches somewhere, not
     243             :              * that it matches everywhere, so there might be positions where
     244             :              * the NOT will match.  We could invert FALSE to TRUE, but there's
     245             :              * little point in distinguishing TRUE from MAYBE, since a recheck
     246             :              * will have been forced already.
     247             :              */
     248       15780 :             if (in_phrase)
     249        5280 :                 return GIN_MAYBE;
     250             : 
     251       10500 :             result = TS_execute_ternary(gcv, curitem + 1, in_phrase);
     252       10500 :             if (result == GIN_MAYBE)
     253         168 :                 return result;
     254       10332 :             return !result;
     255             : 
     256       10120 :         case OP_PHRASE:
     257             : 
     258             :             /*
     259             :              * GIN doesn't contain any information about positions, so treat
     260             :              * OP_PHRASE as OP_AND with recheck requirement, and always
     261             :              * reporting MAYBE not TRUE.
     262             :              */
     263       10120 :             *(gcv->need_recheck) = true;
     264             :             /* Pass down in_phrase == true in case there's a NOT below */
     265       10120 :             in_phrase = true;
     266             : 
     267             :             /* FALL THRU */
     268             : 
     269       12300 :         case OP_AND:
     270       12300 :             val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left,
     271             :                                       in_phrase);
     272       12300 :             if (val1 == GIN_FALSE)
     273        6392 :                 return GIN_FALSE;
     274        5908 :             val2 = TS_execute_ternary(gcv, curitem + 1, in_phrase);
     275        5908 :             if (val2 == GIN_FALSE)
     276        2408 :                 return GIN_FALSE;
     277        3500 :             if (val1 == GIN_TRUE && val2 == GIN_TRUE &&
     278         684 :                 curitem->qoperator.oper != OP_PHRASE)
     279         340 :                 return GIN_TRUE;
     280             :             else
     281        3160 :                 return GIN_MAYBE;
     282             : 
     283        4456 :         case OP_OR:
     284        4456 :             val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left,
     285             :                                       in_phrase);
     286        4456 :             if (val1 == GIN_TRUE)
     287        2764 :                 return GIN_TRUE;
     288        1692 :             val2 = TS_execute_ternary(gcv, curitem + 1, in_phrase);
     289        1692 :             if (val2 == GIN_TRUE)
     290         876 :                 return GIN_TRUE;
     291         816 :             if (val1 == GIN_FALSE && val2 == GIN_FALSE)
     292         788 :                 return GIN_FALSE;
     293             :             else
     294          28 :                 return GIN_MAYBE;
     295             : 
     296           0 :         default:
     297           0 :             elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
     298             :     }
     299             : 
     300             :     /* not reachable, but keep compiler quiet */
     301             :     return false;
     302             : }
     303             : 
     304             : Datum
     305          16 : gin_tsquery_consistent(PG_FUNCTION_ARGS)
     306             : {
     307          16 :     bool       *check = (bool *) PG_GETARG_POINTER(0);
     308             : 
     309             :     /* StrategyNumber strategy = PG_GETARG_UINT16(1); */
     310          16 :     TSQuery     query = PG_GETARG_TSQUERY(2);
     311             : 
     312             :     /* int32    nkeys = PG_GETARG_INT32(3); */
     313          16 :     Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
     314          16 :     bool       *recheck = (bool *) PG_GETARG_POINTER(5);
     315          16 :     bool        res = false;
     316             : 
     317             :     /* Initially assume query doesn't require recheck */
     318          16 :     *recheck = false;
     319             : 
     320          16 :     if (query->size > 0)
     321             :     {
     322             :         GinChkVal   gcv;
     323             : 
     324             :         /*
     325             :          * check-parameter array has one entry for each value (operand) in the
     326             :          * query.
     327             :          */
     328          16 :         gcv.first_item = GETQUERY(query);
     329             :         StaticAssertStmt(sizeof(GinTernaryValue) == sizeof(bool),
     330             :                          "sizes of GinTernaryValue and bool are not equal");
     331          16 :         gcv.check = (GinTernaryValue *) check;
     332          16 :         gcv.map_item_operand = (int *) (extra_data[0]);
     333          16 :         gcv.need_recheck = recheck;
     334             : 
     335          16 :         res = TS_execute(GETQUERY(query),
     336             :                          &gcv,
     337             :                          TS_EXEC_CALC_NOT | TS_EXEC_PHRASE_NO_POS,
     338             :                          checkcondition_gin);
     339             :     }
     340             : 
     341          16 :     PG_RETURN_BOOL(res);
     342             : }
     343             : 
     344             : Datum
     345       20076 : gin_tsquery_triconsistent(PG_FUNCTION_ARGS)
     346             : {
     347       20076 :     GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
     348             : 
     349             :     /* StrategyNumber strategy = PG_GETARG_UINT16(1); */
     350       20076 :     TSQuery     query = PG_GETARG_TSQUERY(2);
     351             : 
     352             :     /* int32    nkeys = PG_GETARG_INT32(3); */
     353       20076 :     Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
     354       20076 :     GinTernaryValue res = GIN_FALSE;
     355             :     bool        recheck;
     356             : 
     357             :     /* Initially assume query doesn't require recheck */
     358       20076 :     recheck = false;
     359             : 
     360       20076 :     if (query->size > 0)
     361             :     {
     362             :         GinChkVal   gcv;
     363             : 
     364             :         /*
     365             :          * check-parameter array has one entry for each value (operand) in the
     366             :          * query.
     367             :          */
     368       20076 :         gcv.first_item = GETQUERY(query);
     369       20076 :         gcv.check = check;
     370       20076 :         gcv.map_item_operand = (int *) (extra_data[0]);
     371       20076 :         gcv.need_recheck = &recheck;
     372             : 
     373       20076 :         res = TS_execute_ternary(&gcv, GETQUERY(query), false);
     374             : 
     375       20076 :         if (res == GIN_TRUE && recheck)
     376        5952 :             res = GIN_MAYBE;
     377             :     }
     378             : 
     379       20076 :     PG_RETURN_GIN_TERNARY_VALUE(res);
     380             : }
     381             : 
     382             : /*
     383             :  * Formerly, gin_extract_tsvector had only two arguments.  Now it has three,
     384             :  * but we still need a pg_proc entry with two args to support reloading
     385             :  * pre-9.1 contrib/tsearch2 opclass declarations.  This compatibility
     386             :  * function should go away eventually.  (Note: you might say "hey, but the
     387             :  * code above is only *using* two args, so let's just declare it that way".
     388             :  * If you try that you'll find the opr_sanity regression test complains.)
     389             :  */
     390             : Datum
     391           0 : gin_extract_tsvector_2args(PG_FUNCTION_ARGS)
     392             : {
     393           0 :     if (PG_NARGS() < 3)          /* should not happen */
     394           0 :         elog(ERROR, "gin_extract_tsvector requires three arguments");
     395           0 :     return gin_extract_tsvector(fcinfo);
     396             : }
     397             : 
     398             : /*
     399             :  * Likewise, we need a stub version of gin_extract_tsquery declared with
     400             :  * only five arguments.
     401             :  */
     402             : Datum
     403           0 : gin_extract_tsquery_5args(PG_FUNCTION_ARGS)
     404             : {
     405           0 :     if (PG_NARGS() < 7)          /* should not happen */
     406           0 :         elog(ERROR, "gin_extract_tsquery requires seven arguments");
     407           0 :     return gin_extract_tsquery(fcinfo);
     408             : }
     409             : 
     410             : /*
     411             :  * Likewise, we need a stub version of gin_tsquery_consistent declared with
     412             :  * only six arguments.
     413             :  */
     414             : Datum
     415           0 : gin_tsquery_consistent_6args(PG_FUNCTION_ARGS)
     416             : {
     417           0 :     if (PG_NARGS() < 8)          /* should not happen */
     418           0 :         elog(ERROR, "gin_tsquery_consistent requires eight arguments");
     419           0 :     return gin_tsquery_consistent(fcinfo);
     420             : }
     421             : 
     422             : /*
     423             :  * Likewise, a stub version of gin_extract_tsquery declared with argument
     424             :  * types that are no longer considered appropriate.
     425             :  */
     426             : Datum
     427           0 : gin_extract_tsquery_oldsig(PG_FUNCTION_ARGS)
     428             : {
     429           0 :     return gin_extract_tsquery(fcinfo);
     430             : }
     431             : 
     432             : /*
     433             :  * Likewise, a stub version of gin_tsquery_consistent declared with argument
     434             :  * types that are no longer considered appropriate.
     435             :  */
     436             : Datum
     437           0 : gin_tsquery_consistent_oldsig(PG_FUNCTION_ARGS)
     438             : {
     439           0 :     return gin_tsquery_consistent(fcinfo);
     440             : }

Generated by: LCOV version 1.13