LCOV - code coverage report
Current view: top level - contrib/pg_trgm - trgm_gin.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13devel Lines: 85 123 69.1 %
Date: 2019-09-19 02:07:14 Functions: 7 10 70.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * contrib/pg_trgm/trgm_gin.c
       3             :  */
       4             : #include "postgres.h"
       5             : 
       6             : #include "trgm.h"
       7             : 
       8             : #include "access/gin.h"
       9             : #include "access/stratnum.h"
      10             : #include "fmgr.h"
      11             : 
      12             : 
      13           0 : PG_FUNCTION_INFO_V1(gin_extract_trgm);
      14           8 : PG_FUNCTION_INFO_V1(gin_extract_value_trgm);
      15           8 : PG_FUNCTION_INFO_V1(gin_extract_query_trgm);
      16           8 : PG_FUNCTION_INFO_V1(gin_trgm_consistent);
      17           8 : PG_FUNCTION_INFO_V1(gin_trgm_triconsistent);
      18             : 
      19             : /*
      20             :  * This function can only be called if a pre-9.1 version of the GIN operator
      21             :  * class definition is present in the catalogs (probably as a consequence
      22             :  * of upgrade-in-place).  Cope.
      23             :  */
      24             : Datum
      25           0 : gin_extract_trgm(PG_FUNCTION_ARGS)
      26             : {
      27           0 :     if (PG_NARGS() == 3)
      28           0 :         return gin_extract_value_trgm(fcinfo);
      29           0 :     if (PG_NARGS() == 7)
      30           0 :         return gin_extract_query_trgm(fcinfo);
      31           0 :     elog(ERROR, "unexpected number of arguments to gin_extract_trgm");
      32             :     PG_RETURN_NULL();
      33             : }
      34             : 
      35             : Datum
      36        4792 : gin_extract_value_trgm(PG_FUNCTION_ARGS)
      37             : {
      38        4792 :     text       *val = (text *) PG_GETARG_TEXT_PP(0);
      39        4792 :     int32      *nentries = (int32 *) PG_GETARG_POINTER(1);
      40        4792 :     Datum      *entries = NULL;
      41             :     TRGM       *trg;
      42             :     int32       trglen;
      43             : 
      44        4792 :     *nentries = 0;
      45             : 
      46        4792 :     trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val));
      47        4792 :     trglen = ARRNELEM(trg);
      48             : 
      49        4792 :     if (trglen > 0)
      50             :     {
      51             :         trgm       *ptr;
      52             :         int32       i;
      53             : 
      54        4792 :         *nentries = trglen;
      55        4792 :         entries = (Datum *) palloc(sizeof(Datum) * trglen);
      56             : 
      57        4792 :         ptr = GETARR(trg);
      58       71124 :         for (i = 0; i < trglen; i++)
      59             :         {
      60       66332 :             int32       item = trgm2int(ptr);
      61             : 
      62       66332 :             entries[i] = Int32GetDatum(item);
      63       66332 :             ptr++;
      64             :         }
      65             :     }
      66             : 
      67        4792 :     PG_RETURN_POINTER(entries);
      68             : }
      69             : 
      70             : Datum
      71         224 : gin_extract_query_trgm(PG_FUNCTION_ARGS)
      72             : {
      73         224 :     text       *val = (text *) PG_GETARG_TEXT_PP(0);
      74         224 :     int32      *nentries = (int32 *) PG_GETARG_POINTER(1);
      75         224 :     StrategyNumber strategy = PG_GETARG_UINT16(2);
      76             : 
      77             :     /* bool   **pmatch = (bool **) PG_GETARG_POINTER(3); */
      78         224 :     Pointer   **extra_data = (Pointer **) PG_GETARG_POINTER(4);
      79             : 
      80             :     /* bool   **nullFlags = (bool **) PG_GETARG_POINTER(5); */
      81         224 :     int32      *searchMode = (int32 *) PG_GETARG_POINTER(6);
      82         224 :     Datum      *entries = NULL;
      83             :     TRGM       *trg;
      84             :     int32       trglen;
      85             :     trgm       *ptr;
      86             :     TrgmPackedGraph *graph;
      87             :     int32       i;
      88             : 
      89         224 :     switch (strategy)
      90             :     {
      91             :         case SimilarityStrategyNumber:
      92             :         case WordSimilarityStrategyNumber:
      93             :         case StrictWordSimilarityStrategyNumber:
      94         108 :             trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val));
      95         108 :             break;
      96             :         case ILikeStrategyNumber:
      97             : #ifndef IGNORECASE
      98             :             elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
      99             : #endif
     100             :             /* FALL THRU */
     101             :         case LikeStrategyNumber:
     102             : 
     103             :             /*
     104             :              * For wildcard search we extract all the trigrams that every
     105             :              * potentially-matching string must include.
     106             :              */
     107          96 :             trg = generate_wildcard_trgm(VARDATA_ANY(val),
     108          96 :                                          VARSIZE_ANY_EXHDR(val));
     109          32 :             break;
     110             :         case RegExpICaseStrategyNumber:
     111             : #ifndef IGNORECASE
     112             :             elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
     113             : #endif
     114             :             /* FALL THRU */
     115             :         case RegExpStrategyNumber:
     116          84 :             trg = createTrgmNFA(val, PG_GET_COLLATION(),
     117             :                                 &graph, CurrentMemoryContext);
     118         152 :             if (trg && ARRNELEM(trg) > 0)
     119             :             {
     120             :                 /*
     121             :                  * Successful regex processing: store NFA-like graph as
     122             :                  * extra_data.  GIN API requires an array of nentries
     123             :                  * Pointers, but we just put the same value in each element.
     124             :                  */
     125          68 :                 trglen = ARRNELEM(trg);
     126          68 :                 *extra_data = (Pointer *) palloc(sizeof(Pointer) * trglen);
     127        1696 :                 for (i = 0; i < trglen; i++)
     128        1628 :                     (*extra_data)[i] = (Pointer) graph;
     129             :             }
     130             :             else
     131             :             {
     132             :                 /* No result: have to do full index scan. */
     133          16 :                 *nentries = 0;
     134          16 :                 *searchMode = GIN_SEARCH_MODE_ALL;
     135          16 :                 PG_RETURN_POINTER(entries);
     136             :             }
     137          68 :             break;
     138             :         default:
     139           0 :             elog(ERROR, "unrecognized strategy number: %d", strategy);
     140             :             trg = NULL;         /* keep compiler quiet */
     141             :             break;
     142             :     }
     143             : 
     144         208 :     trglen = ARRNELEM(trg);
     145         208 :     *nentries = trglen;
     146             : 
     147         208 :     if (trglen > 0)
     148             :     {
     149         208 :         entries = (Datum *) palloc(sizeof(Datum) * trglen);
     150         208 :         ptr = GETARR(trg);
     151        2876 :         for (i = 0; i < trglen; i++)
     152             :         {
     153        2668 :             int32       item = trgm2int(ptr);
     154             : 
     155        2668 :             entries[i] = Int32GetDatum(item);
     156        2668 :             ptr++;
     157             :         }
     158             :     }
     159             : 
     160             :     /*
     161             :      * If no trigram was extracted then we have to scan all the index.
     162             :      */
     163         208 :     if (trglen == 0)
     164           0 :         *searchMode = GIN_SEARCH_MODE_ALL;
     165             : 
     166         208 :     PG_RETURN_POINTER(entries);
     167             : }
     168             : 
     169             : Datum
     170           0 : gin_trgm_consistent(PG_FUNCTION_ARGS)
     171             : {
     172           0 :     bool       *check = (bool *) PG_GETARG_POINTER(0);
     173           0 :     StrategyNumber strategy = PG_GETARG_UINT16(1);
     174             : 
     175             :     /* text    *query = PG_GETARG_TEXT_PP(2); */
     176           0 :     int32       nkeys = PG_GETARG_INT32(3);
     177           0 :     Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
     178           0 :     bool       *recheck = (bool *) PG_GETARG_POINTER(5);
     179             :     bool        res;
     180             :     int32       i,
     181             :                 ntrue;
     182             :     double      nlimit;
     183             : 
     184             :     /* All cases served by this function are inexact */
     185           0 :     *recheck = true;
     186             : 
     187           0 :     switch (strategy)
     188             :     {
     189             :         case SimilarityStrategyNumber:
     190             :         case WordSimilarityStrategyNumber:
     191             :         case StrictWordSimilarityStrategyNumber:
     192           0 :             nlimit = index_strategy_get_limit(strategy);
     193             : 
     194             :             /* Count the matches */
     195           0 :             ntrue = 0;
     196           0 :             for (i = 0; i < nkeys; i++)
     197             :             {
     198           0 :                 if (check[i])
     199           0 :                     ntrue++;
     200             :             }
     201             : 
     202             :             /*--------------------
     203             :              * If DIVUNION is defined then similarity formula is:
     204             :              * c / (len1 + len2 - c)
     205             :              * where c is number of common trigrams and it stands as ntrue in
     206             :              * this code.  Here we don't know value of len2 but we can assume
     207             :              * that c (ntrue) is a lower bound of len2, so upper bound of
     208             :              * similarity is:
     209             :              * c / (len1 + c - c)  => c / len1
     210             :              * If DIVUNION is not defined then similarity formula is:
     211             :              * c / max(len1, len2)
     212             :              * And again, c (ntrue) is a lower bound of len2, but c <= len1
     213             :              * just by definition and, consequently, upper bound of
     214             :              * similarity is just c / len1.
     215             :              * So, independently on DIVUNION the upper bound formula is the same.
     216             :              */
     217           0 :             res = (nkeys == 0) ? false :
     218           0 :                 (((((float4) ntrue) / ((float4) nkeys))) >= nlimit);
     219           0 :             break;
     220             :         case ILikeStrategyNumber:
     221             : #ifndef IGNORECASE
     222             :             elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
     223             : #endif
     224             :             /* FALL THRU */
     225             :         case LikeStrategyNumber:
     226             :             /* Check if all extracted trigrams are presented. */
     227           0 :             res = true;
     228           0 :             for (i = 0; i < nkeys; i++)
     229             :             {
     230           0 :                 if (!check[i])
     231             :                 {
     232           0 :                     res = false;
     233           0 :                     break;
     234             :                 }
     235             :             }
     236           0 :             break;
     237             :         case RegExpICaseStrategyNumber:
     238             : #ifndef IGNORECASE
     239             :             elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
     240             : #endif
     241             :             /* FALL THRU */
     242             :         case RegExpStrategyNumber:
     243           0 :             if (nkeys < 1)
     244             :             {
     245             :                 /* Regex processing gave no result: do full index scan */
     246           0 :                 res = true;
     247             :             }
     248             :             else
     249           0 :                 res = trigramsMatchGraph((TrgmPackedGraph *) extra_data[0],
     250             :                                          check);
     251           0 :             break;
     252             :         default:
     253           0 :             elog(ERROR, "unrecognized strategy number: %d", strategy);
     254             :             res = false;        /* keep compiler quiet */
     255             :             break;
     256             :     }
     257             : 
     258           0 :     PG_RETURN_BOOL(res);
     259             : }
     260             : 
     261             : /*
     262             :  * In all cases, GIN_TRUE is at least as favorable to inclusion as
     263             :  * GIN_MAYBE. If no better option is available, simply treat
     264             :  * GIN_MAYBE as if it were GIN_TRUE and apply the same test as the binary
     265             :  * consistent function.
     266             :  */
     267             : Datum
     268       19192 : gin_trgm_triconsistent(PG_FUNCTION_ARGS)
     269             : {
     270       19192 :     GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
     271       19192 :     StrategyNumber strategy = PG_GETARG_UINT16(1);
     272             : 
     273             :     /* text    *query = PG_GETARG_TEXT_PP(2); */
     274       19192 :     int32       nkeys = PG_GETARG_INT32(3);
     275       19192 :     Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
     276       19192 :     GinTernaryValue res = GIN_MAYBE;
     277             :     int32       i,
     278             :                 ntrue;
     279             :     bool       *boolcheck;
     280             :     double      nlimit;
     281             : 
     282       19192 :     switch (strategy)
     283             :     {
     284             :         case SimilarityStrategyNumber:
     285             :         case WordSimilarityStrategyNumber:
     286             :         case StrictWordSimilarityStrategyNumber:
     287       16382 :             nlimit = index_strategy_get_limit(strategy);
     288             : 
     289             :             /* Count the matches */
     290       16382 :             ntrue = 0;
     291      173818 :             for (i = 0; i < nkeys; i++)
     292             :             {
     293      157436 :                 if (check[i] != GIN_FALSE)
     294       66344 :                     ntrue++;
     295             :             }
     296             : 
     297             :             /*
     298             :              * See comment in gin_trgm_consistent() about * upper bound
     299             :              * formula
     300             :              */
     301       32764 :             res = (nkeys == 0)
     302       16382 :                 ? GIN_FALSE : (((((float4) ntrue) / ((float4) nkeys)) >= nlimit)
     303             :                                ? GIN_MAYBE : GIN_FALSE);
     304       16382 :             break;
     305             :         case ILikeStrategyNumber:
     306             : #ifndef IGNORECASE
     307             :             elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
     308             : #endif
     309             :             /* FALL THRU */
     310             :         case LikeStrategyNumber:
     311             :             /* Check if all extracted trigrams are presented. */
     312          20 :             res = GIN_MAYBE;
     313          58 :             for (i = 0; i < nkeys; i++)
     314             :             {
     315          44 :                 if (check[i] == GIN_FALSE)
     316             :                 {
     317           6 :                     res = GIN_FALSE;
     318           6 :                     break;
     319             :                 }
     320             :             }
     321          20 :             break;
     322             :         case RegExpICaseStrategyNumber:
     323             : #ifndef IGNORECASE
     324             :             elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
     325             : #endif
     326             :             /* FALL THRU */
     327             :         case RegExpStrategyNumber:
     328        2790 :             if (nkeys < 1)
     329             :             {
     330             :                 /* Regex processing gave no result: do full index scan */
     331          32 :                 res = GIN_MAYBE;
     332             :             }
     333             :             else
     334             :             {
     335             :                 /*
     336             :                  * As trigramsMatchGraph implements a monotonic boolean
     337             :                  * function, promoting all GIN_MAYBE keys to GIN_TRUE will
     338             :                  * give a conservative result.
     339             :                  */
     340        2758 :                 boolcheck = (bool *) palloc(sizeof(bool) * nkeys);
     341      638446 :                 for (i = 0; i < nkeys; i++)
     342      635688 :                     boolcheck[i] = (check[i] != GIN_FALSE);
     343        2758 :                 if (!trigramsMatchGraph((TrgmPackedGraph *) extra_data[0],
     344             :                                         boolcheck))
     345          12 :                     res = GIN_FALSE;
     346        2758 :                 pfree(boolcheck);
     347             :             }
     348        2790 :             break;
     349             :         default:
     350           0 :             elog(ERROR, "unrecognized strategy number: %d", strategy);
     351             :             res = GIN_FALSE;    /* keep compiler quiet */
     352             :             break;
     353             :     }
     354             : 
     355             :     /* All cases served by this function are inexact */
     356             :     Assert(res != GIN_TRUE);
     357       19192 :     PG_RETURN_GIN_TERNARY_VALUE(res);
     358             : }

Generated by: LCOV version 1.13