LCOV - code coverage report
Current view: top level - contrib/pg_trgm - trgm_gin.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13beta1 Lines: 106 135 78.5 %
Date: 2020-06-01 08:06:25 Functions: 8 10 80.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * contrib/pg_trgm/trgm_gin.c
       3             :  */
       4             : #include "postgres.h"
       5             : 
       6             : #include "access/gin.h"
       7             : #include "access/stratnum.h"
       8             : #include "fmgr.h"
       9             : #include "trgm.h"
      10             : 
      11           0 : PG_FUNCTION_INFO_V1(gin_extract_trgm);
      12           8 : PG_FUNCTION_INFO_V1(gin_extract_value_trgm);
      13           8 : PG_FUNCTION_INFO_V1(gin_extract_query_trgm);
      14           8 : PG_FUNCTION_INFO_V1(gin_trgm_consistent);
      15           8 : PG_FUNCTION_INFO_V1(gin_trgm_triconsistent);
      16             : 
      17             : /*
      18             :  * This function can only be called if a pre-9.1 version of the GIN operator
      19             :  * class definition is present in the catalogs (probably as a consequence
      20             :  * of upgrade-in-place).  Cope.
      21             :  */
      22             : Datum
      23           0 : gin_extract_trgm(PG_FUNCTION_ARGS)
      24             : {
      25           0 :     if (PG_NARGS() == 3)
      26           0 :         return gin_extract_value_trgm(fcinfo);
      27           0 :     if (PG_NARGS() == 7)
      28           0 :         return gin_extract_query_trgm(fcinfo);
      29           0 :     elog(ERROR, "unexpected number of arguments to gin_extract_trgm");
      30             :     PG_RETURN_NULL();
      31             : }
      32             : 
      33             : Datum
      34        4796 : gin_extract_value_trgm(PG_FUNCTION_ARGS)
      35             : {
      36        4796 :     text       *val = (text *) PG_GETARG_TEXT_PP(0);
      37        4796 :     int32      *nentries = (int32 *) PG_GETARG_POINTER(1);
      38        4796 :     Datum      *entries = NULL;
      39             :     TRGM       *trg;
      40             :     int32       trglen;
      41             : 
      42        4796 :     *nentries = 0;
      43             : 
      44        4796 :     trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val));
      45        4796 :     trglen = ARRNELEM(trg);
      46             : 
      47        4796 :     if (trglen > 0)
      48             :     {
      49             :         trgm       *ptr;
      50             :         int32       i;
      51             : 
      52        4796 :         *nentries = trglen;
      53        4796 :         entries = (Datum *) palloc(sizeof(Datum) * trglen);
      54             : 
      55        4796 :         ptr = GETARR(trg);
      56       71164 :         for (i = 0; i < trglen; i++)
      57             :         {
      58       66368 :             int32       item = trgm2int(ptr);
      59             : 
      60       66368 :             entries[i] = Int32GetDatum(item);
      61       66368 :             ptr++;
      62             :         }
      63             :     }
      64             : 
      65        4796 :     PG_RETURN_POINTER(entries);
      66             : }
      67             : 
      68             : Datum
      69         288 : gin_extract_query_trgm(PG_FUNCTION_ARGS)
      70             : {
      71         288 :     text       *val = (text *) PG_GETARG_TEXT_PP(0);
      72         288 :     int32      *nentries = (int32 *) PG_GETARG_POINTER(1);
      73         288 :     StrategyNumber strategy = PG_GETARG_UINT16(2);
      74             : 
      75             :     /* bool   **pmatch = (bool **) PG_GETARG_POINTER(3); */
      76         288 :     Pointer   **extra_data = (Pointer **) PG_GETARG_POINTER(4);
      77             : 
      78             :     /* bool   **nullFlags = (bool **) PG_GETARG_POINTER(5); */
      79         288 :     int32      *searchMode = (int32 *) PG_GETARG_POINTER(6);
      80         288 :     Datum      *entries = NULL;
      81             :     TRGM       *trg;
      82             :     int32       trglen;
      83             :     trgm       *ptr;
      84             :     TrgmPackedGraph *graph;
      85             :     int32       i;
      86             : 
      87         288 :     switch (strategy)
      88             :     {
      89         108 :         case SimilarityStrategyNumber:
      90             :         case WordSimilarityStrategyNumber:
      91             :         case StrictWordSimilarityStrategyNumber:
      92         108 :             trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val));
      93         108 :             break;
      94          96 :         case ILikeStrategyNumber:
      95             : #ifndef IGNORECASE
      96             :             elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
      97             : #endif
      98             :             /* FALL THRU */
      99             :         case LikeStrategyNumber:
     100             : 
     101             :             /*
     102             :              * For wildcard search we extract all the trigrams that every
     103             :              * potentially-matching string must include.
     104             :              */
     105          96 :             trg = generate_wildcard_trgm(VARDATA_ANY(val),
     106          96 :                                          VARSIZE_ANY_EXHDR(val));
     107          96 :             break;
     108          84 :         case RegExpICaseStrategyNumber:
     109             : #ifndef IGNORECASE
     110             :             elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
     111             : #endif
     112             :             /* FALL THRU */
     113             :         case RegExpStrategyNumber:
     114          84 :             trg = createTrgmNFA(val, PG_GET_COLLATION(),
     115             :                                 &graph, CurrentMemoryContext);
     116         152 :             if (trg && ARRNELEM(trg) > 0)
     117             :             {
     118             :                 /*
     119             :                  * Successful regex processing: store NFA-like graph as
     120             :                  * extra_data.  GIN API requires an array of nentries
     121             :                  * Pointers, but we just put the same value in each element.
     122             :                  */
     123          68 :                 trglen = ARRNELEM(trg);
     124          68 :                 *extra_data = (Pointer *) palloc(sizeof(Pointer) * trglen);
     125        1696 :                 for (i = 0; i < trglen; i++)
     126        1628 :                     (*extra_data)[i] = (Pointer) graph;
     127             :             }
     128             :             else
     129             :             {
     130             :                 /* No result: have to do full index scan. */
     131          16 :                 *nentries = 0;
     132          16 :                 *searchMode = GIN_SEARCH_MODE_ALL;
     133          16 :                 PG_RETURN_POINTER(entries);
     134             :             }
     135          68 :             break;
     136           0 :         default:
     137           0 :             elog(ERROR, "unrecognized strategy number: %d", strategy);
     138             :             trg = NULL;         /* keep compiler quiet */
     139             :             break;
     140             :     }
     141             : 
     142         272 :     trglen = ARRNELEM(trg);
     143         272 :     *nentries = trglen;
     144             : 
     145         272 :     if (trglen > 0)
     146             :     {
     147         224 :         entries = (Datum *) palloc(sizeof(Datum) * trglen);
     148         224 :         ptr = GETARR(trg);
     149        2956 :         for (i = 0; i < trglen; i++)
     150             :         {
     151        2732 :             int32       item = trgm2int(ptr);
     152             : 
     153        2732 :             entries[i] = Int32GetDatum(item);
     154        2732 :             ptr++;
     155             :         }
     156             :     }
     157             : 
     158             :     /*
     159             :      * If no trigram was extracted then we have to scan all the index.
     160             :      */
     161         272 :     if (trglen == 0)
     162          48 :         *searchMode = GIN_SEARCH_MODE_ALL;
     163             : 
     164         272 :     PG_RETURN_POINTER(entries);
     165             : }
     166             : 
     167             : Datum
     168          16 : gin_trgm_consistent(PG_FUNCTION_ARGS)
     169             : {
     170          16 :     bool       *check = (bool *) PG_GETARG_POINTER(0);
     171          16 :     StrategyNumber strategy = PG_GETARG_UINT16(1);
     172             : 
     173             :     /* text    *query = PG_GETARG_TEXT_PP(2); */
     174          16 :     int32       nkeys = PG_GETARG_INT32(3);
     175          16 :     Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
     176          16 :     bool       *recheck = (bool *) PG_GETARG_POINTER(5);
     177             :     bool        res;
     178             :     int32       i,
     179             :                 ntrue;
     180             :     double      nlimit;
     181             : 
     182             :     /* All cases served by this function are inexact */
     183          16 :     *recheck = true;
     184             : 
     185          16 :     switch (strategy)
     186             :     {
     187           0 :         case SimilarityStrategyNumber:
     188             :         case WordSimilarityStrategyNumber:
     189             :         case StrictWordSimilarityStrategyNumber:
     190           0 :             nlimit = index_strategy_get_limit(strategy);
     191             : 
     192             :             /* Count the matches */
     193           0 :             ntrue = 0;
     194           0 :             for (i = 0; i < nkeys; i++)
     195             :             {
     196           0 :                 if (check[i])
     197           0 :                     ntrue++;
     198             :             }
     199             : 
     200             :             /*--------------------
     201             :              * If DIVUNION is defined then similarity formula is:
     202             :              * c / (len1 + len2 - c)
     203             :              * where c is number of common trigrams and it stands as ntrue in
     204             :              * this code.  Here we don't know value of len2 but we can assume
     205             :              * that c (ntrue) is a lower bound of len2, so upper bound of
     206             :              * similarity is:
     207             :              * c / (len1 + c - c)  => c / len1
     208             :              * If DIVUNION is not defined then similarity formula is:
     209             :              * c / max(len1, len2)
     210             :              * And again, c (ntrue) is a lower bound of len2, but c <= len1
     211             :              * just by definition and, consequently, upper bound of
     212             :              * similarity is just c / len1.
     213             :              * So, independently on DIVUNION the upper bound formula is the same.
     214             :              */
     215           0 :             res = (nkeys == 0) ? false :
     216           0 :                 (((((float4) ntrue) / ((float4) nkeys))) >= nlimit);
     217           0 :             break;
     218          16 :         case ILikeStrategyNumber:
     219             : #ifndef IGNORECASE
     220             :             elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
     221             : #endif
     222             :             /* FALL THRU */
     223             :         case LikeStrategyNumber:
     224             :             /* Check if all extracted trigrams are presented. */
     225          16 :             res = true;
     226          32 :             for (i = 0; i < nkeys; i++)
     227             :             {
     228          16 :                 if (!check[i])
     229             :                 {
     230           0 :                     res = false;
     231           0 :                     break;
     232             :                 }
     233             :             }
     234          16 :             break;
     235           0 :         case RegExpICaseStrategyNumber:
     236             : #ifndef IGNORECASE
     237             :             elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
     238             : #endif
     239             :             /* FALL THRU */
     240             :         case RegExpStrategyNumber:
     241           0 :             if (nkeys < 1)
     242             :             {
     243             :                 /* Regex processing gave no result: do full index scan */
     244           0 :                 res = true;
     245             :             }
     246             :             else
     247           0 :                 res = trigramsMatchGraph((TrgmPackedGraph *) extra_data[0],
     248             :                                          check);
     249           0 :             break;
     250           0 :         default:
     251           0 :             elog(ERROR, "unrecognized strategy number: %d", strategy);
     252             :             res = false;        /* keep compiler quiet */
     253             :             break;
     254             :     }
     255             : 
     256          16 :     PG_RETURN_BOOL(res);
     257             : }
     258             : 
     259             : /*
     260             :  * In all cases, GIN_TRUE is at least as favorable to inclusion as
     261             :  * GIN_MAYBE. If no better option is available, simply treat
     262             :  * GIN_MAYBE as if it were GIN_TRUE and apply the same test as the binary
     263             :  * consistent function.
     264             :  */
     265             : Datum
     266       27210 : gin_trgm_triconsistent(PG_FUNCTION_ARGS)
     267             : {
     268       27210 :     GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
     269       27210 :     StrategyNumber strategy = PG_GETARG_UINT16(1);
     270             : 
     271             :     /* text    *query = PG_GETARG_TEXT_PP(2); */
     272       27210 :     int32       nkeys = PG_GETARG_INT32(3);
     273       27210 :     Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
     274       27210 :     GinTernaryValue res = GIN_MAYBE;
     275             :     int32       i,
     276             :                 ntrue;
     277             :     bool       *boolcheck;
     278             :     double      nlimit;
     279             : 
     280       27210 :     switch (strategy)
     281             :     {
     282       16382 :         case SimilarityStrategyNumber:
     283             :         case WordSimilarityStrategyNumber:
     284             :         case StrictWordSimilarityStrategyNumber:
     285       16382 :             nlimit = index_strategy_get_limit(strategy);
     286             : 
     287             :             /* Count the matches */
     288       16382 :             ntrue = 0;
     289      173818 :             for (i = 0; i < nkeys; i++)
     290             :             {
     291      157436 :                 if (check[i] != GIN_FALSE)
     292       66344 :                     ntrue++;
     293             :             }
     294             : 
     295             :             /*
     296             :              * See comment in gin_trgm_consistent() about * upper bound
     297             :              * formula
     298             :              */
     299       32764 :             res = (nkeys == 0)
     300       16382 :                 ? GIN_FALSE : (((((float4) ntrue) / ((float4) nkeys)) >= nlimit)
     301             :                                ? GIN_MAYBE : GIN_FALSE);
     302       16382 :             break;
     303        8038 :         case ILikeStrategyNumber:
     304             : #ifndef IGNORECASE
     305             :             elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
     306             : #endif
     307             :             /* FALL THRU */
     308             :         case LikeStrategyNumber:
     309             :             /* Check if all extracted trigrams are presented. */
     310        8038 :             res = GIN_MAYBE;
     311       16076 :             for (i = 0; i < nkeys; i++)
     312             :             {
     313        8048 :                 if (check[i] == GIN_FALSE)
     314             :                 {
     315          10 :                     res = GIN_FALSE;
     316          10 :                     break;
     317             :                 }
     318             :             }
     319        8038 :             break;
     320        2790 :         case RegExpICaseStrategyNumber:
     321             : #ifndef IGNORECASE
     322             :             elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
     323             : #endif
     324             :             /* FALL THRU */
     325             :         case RegExpStrategyNumber:
     326        2790 :             if (nkeys < 1)
     327             :             {
     328             :                 /* Regex processing gave no result: do full index scan */
     329          32 :                 res = GIN_MAYBE;
     330             :             }
     331             :             else
     332             :             {
     333             :                 /*
     334             :                  * As trigramsMatchGraph implements a monotonic boolean
     335             :                  * function, promoting all GIN_MAYBE keys to GIN_TRUE will
     336             :                  * give a conservative result.
     337             :                  */
     338        2758 :                 boolcheck = (bool *) palloc(sizeof(bool) * nkeys);
     339      638446 :                 for (i = 0; i < nkeys; i++)
     340      635688 :                     boolcheck[i] = (check[i] != GIN_FALSE);
     341        2758 :                 if (!trigramsMatchGraph((TrgmPackedGraph *) extra_data[0],
     342             :                                         boolcheck))
     343          12 :                     res = GIN_FALSE;
     344        2758 :                 pfree(boolcheck);
     345             :             }
     346        2790 :             break;
     347           0 :         default:
     348           0 :             elog(ERROR, "unrecognized strategy number: %d", strategy);
     349             :             res = GIN_FALSE;    /* keep compiler quiet */
     350             :             break;
     351             :     }
     352             : 
     353             :     /* All cases served by this function are inexact */
     354             :     Assert(res != GIN_TRUE);
     355       27210 :     PG_RETURN_GIN_TERNARY_VALUE(res);
     356             : }

Generated by: LCOV version 1.13