LCOV - code coverage report
Current view: top level - contrib/pg_trgm - trgm_gin.c (source / functions) Hit Total Coverage
Test: PostgreSQL 14devel Lines: 106 135 78.5 %
Date: 2020-12-05 17:06:23 Functions: 8 10 80.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * contrib/pg_trgm/trgm_gin.c
       3             :  */
       4             : #include "postgres.h"
       5             : 
       6             : #include "access/gin.h"
       7             : #include "access/stratnum.h"
       8             : #include "fmgr.h"
       9             : #include "trgm.h"
      10             : 
      11           0 : PG_FUNCTION_INFO_V1(gin_extract_trgm);
      12           8 : PG_FUNCTION_INFO_V1(gin_extract_value_trgm);
      13           8 : PG_FUNCTION_INFO_V1(gin_extract_query_trgm);
      14           8 : PG_FUNCTION_INFO_V1(gin_trgm_consistent);
      15           8 : PG_FUNCTION_INFO_V1(gin_trgm_triconsistent);
      16             : 
      17             : /*
      18             :  * This function can only be called if a pre-9.1 version of the GIN operator
      19             :  * class definition is present in the catalogs (probably as a consequence
      20             :  * of upgrade-in-place).  Cope.
      21             :  */
      22             : Datum
      23           0 : gin_extract_trgm(PG_FUNCTION_ARGS)
      24             : {
      25           0 :     if (PG_NARGS() == 3)
      26           0 :         return gin_extract_value_trgm(fcinfo);
      27           0 :     if (PG_NARGS() == 7)
      28           0 :         return gin_extract_query_trgm(fcinfo);
      29           0 :     elog(ERROR, "unexpected number of arguments to gin_extract_trgm");
      30             :     PG_RETURN_NULL();
      31             : }
      32             : 
      33             : Datum
      34        4808 : gin_extract_value_trgm(PG_FUNCTION_ARGS)
      35             : {
      36        4808 :     text       *val = (text *) PG_GETARG_TEXT_PP(0);
      37        4808 :     int32      *nentries = (int32 *) PG_GETARG_POINTER(1);
      38        4808 :     Datum      *entries = NULL;
      39             :     TRGM       *trg;
      40             :     int32       trglen;
      41             : 
      42        4808 :     *nentries = 0;
      43             : 
      44        4808 :     trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val));
      45        4808 :     trglen = ARRNELEM(trg);
      46             : 
      47        4808 :     if (trglen > 0)
      48             :     {
      49             :         trgm       *ptr;
      50             :         int32       i;
      51             : 
      52        4808 :         *nentries = trglen;
      53        4808 :         entries = (Datum *) palloc(sizeof(Datum) * trglen);
      54             : 
      55        4808 :         ptr = GETARR(trg);
      56       71262 :         for (i = 0; i < trglen; i++)
      57             :         {
      58       66454 :             int32       item = trgm2int(ptr);
      59             : 
      60       66454 :             entries[i] = Int32GetDatum(item);
      61       66454 :             ptr++;
      62             :         }
      63             :     }
      64             : 
      65        4808 :     PG_RETURN_POINTER(entries);
      66             : }
      67             : 
      68             : Datum
      69         340 : gin_extract_query_trgm(PG_FUNCTION_ARGS)
      70             : {
      71         340 :     text       *val = (text *) PG_GETARG_TEXT_PP(0);
      72         340 :     int32      *nentries = (int32 *) PG_GETARG_POINTER(1);
      73         340 :     StrategyNumber strategy = PG_GETARG_UINT16(2);
      74             : 
      75             :     /* bool   **pmatch = (bool **) PG_GETARG_POINTER(3); */
      76         340 :     Pointer   **extra_data = (Pointer **) PG_GETARG_POINTER(4);
      77             : 
      78             :     /* bool   **nullFlags = (bool **) PG_GETARG_POINTER(5); */
      79         340 :     int32      *searchMode = (int32 *) PG_GETARG_POINTER(6);
      80         340 :     Datum      *entries = NULL;
      81             :     TRGM       *trg;
      82             :     int32       trglen;
      83             :     trgm       *ptr;
      84             :     TrgmPackedGraph *graph;
      85             :     int32       i;
      86             : 
      87         340 :     switch (strategy)
      88             :     {
      89         160 :         case SimilarityStrategyNumber:
      90             :         case WordSimilarityStrategyNumber:
      91             :         case StrictWordSimilarityStrategyNumber:
      92             :         case EqualStrategyNumber:
      93         160 :             trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val));
      94         160 :             break;
      95          96 :         case ILikeStrategyNumber:
      96             : #ifndef IGNORECASE
      97             :             elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
      98             : #endif
      99             :             /* FALL THRU */
     100             :         case LikeStrategyNumber:
     101             : 
     102             :             /*
     103             :              * For wildcard search we extract all the trigrams that every
     104             :              * potentially-matching string must include.
     105             :              */
     106          96 :             trg = generate_wildcard_trgm(VARDATA_ANY(val),
     107          96 :                                          VARSIZE_ANY_EXHDR(val));
     108          96 :             break;
     109          84 :         case RegExpICaseStrategyNumber:
     110             : #ifndef IGNORECASE
     111             :             elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
     112             : #endif
     113             :             /* FALL THRU */
     114             :         case RegExpStrategyNumber:
     115          84 :             trg = createTrgmNFA(val, PG_GET_COLLATION(),
     116             :                                 &graph, CurrentMemoryContext);
     117         152 :             if (trg && ARRNELEM(trg) > 0)
     118             :             {
     119             :                 /*
     120             :                  * Successful regex processing: store NFA-like graph as
     121             :                  * extra_data.  GIN API requires an array of nentries
     122             :                  * Pointers, but we just put the same value in each element.
     123             :                  */
     124          68 :                 trglen = ARRNELEM(trg);
     125          68 :                 *extra_data = (Pointer *) palloc(sizeof(Pointer) * trglen);
     126        1696 :                 for (i = 0; i < trglen; i++)
     127        1628 :                     (*extra_data)[i] = (Pointer) graph;
     128             :             }
     129             :             else
     130             :             {
     131             :                 /* No result: have to do full index scan. */
     132          16 :                 *nentries = 0;
     133          16 :                 *searchMode = GIN_SEARCH_MODE_ALL;
     134          16 :                 PG_RETURN_POINTER(entries);
     135             :             }
     136          68 :             break;
     137           0 :         default:
     138           0 :             elog(ERROR, "unrecognized strategy number: %d", strategy);
     139             :             trg = NULL;         /* keep compiler quiet */
     140             :             break;
     141             :     }
     142             : 
     143         324 :     trglen = ARRNELEM(trg);
     144         324 :     *nentries = trglen;
     145             : 
     146         324 :     if (trglen > 0)
     147             :     {
     148         276 :         entries = (Datum *) palloc(sizeof(Datum) * trglen);
     149         276 :         ptr = GETARR(trg);
     150        3368 :         for (i = 0; i < trglen; i++)
     151             :         {
     152        3092 :             int32       item = trgm2int(ptr);
     153             : 
     154        3092 :             entries[i] = Int32GetDatum(item);
     155        3092 :             ptr++;
     156             :         }
     157             :     }
     158             : 
     159             :     /*
     160             :      * If no trigram was extracted then we have to scan all the index.
     161             :      */
     162         324 :     if (trglen == 0)
     163          48 :         *searchMode = GIN_SEARCH_MODE_ALL;
     164             : 
     165         324 :     PG_RETURN_POINTER(entries);
     166             : }
     167             : 
     168             : Datum
     169          16 : gin_trgm_consistent(PG_FUNCTION_ARGS)
     170             : {
     171          16 :     bool       *check = (bool *) PG_GETARG_POINTER(0);
     172          16 :     StrategyNumber strategy = PG_GETARG_UINT16(1);
     173             : 
     174             :     /* text    *query = PG_GETARG_TEXT_PP(2); */
     175          16 :     int32       nkeys = PG_GETARG_INT32(3);
     176          16 :     Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
     177          16 :     bool       *recheck = (bool *) PG_GETARG_POINTER(5);
     178             :     bool        res;
     179             :     int32       i,
     180             :                 ntrue;
     181             :     double      nlimit;
     182             : 
     183             :     /* All cases served by this function are inexact */
     184          16 :     *recheck = true;
     185             : 
     186          16 :     switch (strategy)
     187             :     {
     188           0 :         case SimilarityStrategyNumber:
     189             :         case WordSimilarityStrategyNumber:
     190             :         case StrictWordSimilarityStrategyNumber:
     191           0 :             nlimit = index_strategy_get_limit(strategy);
     192             : 
     193             :             /* Count the matches */
     194           0 :             ntrue = 0;
     195           0 :             for (i = 0; i < nkeys; i++)
     196             :             {
     197           0 :                 if (check[i])
     198           0 :                     ntrue++;
     199             :             }
     200             : 
     201             :             /*--------------------
     202             :              * If DIVUNION is defined then similarity formula is:
     203             :              * c / (len1 + len2 - c)
     204             :              * where c is number of common trigrams and it stands as ntrue in
     205             :              * this code.  Here we don't know value of len2 but we can assume
     206             :              * that c (ntrue) is a lower bound of len2, so upper bound of
     207             :              * similarity is:
     208             :              * c / (len1 + c - c)  => c / len1
     209             :              * If DIVUNION is not defined then similarity formula is:
     210             :              * c / max(len1, len2)
     211             :              * And again, c (ntrue) is a lower bound of len2, but c <= len1
     212             :              * just by definition and, consequently, upper bound of
     213             :              * similarity is just c / len1.
     214             :              * So, independently on DIVUNION the upper bound formula is the same.
     215             :              */
     216           0 :             res = (nkeys == 0) ? false :
     217           0 :                 (((((float4) ntrue) / ((float4) nkeys))) >= nlimit);
     218           0 :             break;
     219          16 :         case ILikeStrategyNumber:
     220             : #ifndef IGNORECASE
     221             :             elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
     222             : #endif
     223             :             /* FALL THRU */
     224             :         case LikeStrategyNumber:
     225             :         case EqualStrategyNumber:
     226             :             /* Check if all extracted trigrams are presented. */
     227          16 :             res = true;
     228          32 :             for (i = 0; i < nkeys; i++)
     229             :             {
     230          16 :                 if (!check[i])
     231             :                 {
     232           0 :                     res = false;
     233           0 :                     break;
     234             :                 }
     235             :             }
     236          16 :             break;
     237           0 :         case RegExpICaseStrategyNumber:
     238             : #ifndef IGNORECASE
     239             :             elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
     240             : #endif
     241             :             /* FALL THRU */
     242             :         case RegExpStrategyNumber:
     243           0 :             if (nkeys < 1)
     244             :             {
     245             :                 /* Regex processing gave no result: do full index scan */
     246           0 :                 res = true;
     247             :             }
     248             :             else
     249           0 :                 res = trigramsMatchGraph((TrgmPackedGraph *) extra_data[0],
     250             :                                          check);
     251           0 :             break;
     252           0 :         default:
     253           0 :             elog(ERROR, "unrecognized strategy number: %d", strategy);
     254             :             res = false;        /* keep compiler quiet */
     255             :             break;
     256             :     }
     257             : 
     258          16 :     PG_RETURN_BOOL(res);
     259             : }
     260             : 
     261             : /*
     262             :  * In all cases, GIN_TRUE is at least as favorable to inclusion as
     263             :  * GIN_MAYBE. If no better option is available, simply treat
     264             :  * GIN_MAYBE as if it were GIN_TRUE and apply the same test as the binary
     265             :  * consistent function.
     266             :  */
     267             : Datum
     268       27324 : gin_trgm_triconsistent(PG_FUNCTION_ARGS)
     269             : {
     270       27324 :     GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
     271       27324 :     StrategyNumber strategy = PG_GETARG_UINT16(1);
     272             : 
     273             :     /* text    *query = PG_GETARG_TEXT_PP(2); */
     274       27324 :     int32       nkeys = PG_GETARG_INT32(3);
     275       27324 :     Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
     276       27324 :     GinTernaryValue res = GIN_MAYBE;
     277             :     int32       i,
     278             :                 ntrue;
     279             :     bool       *boolcheck;
     280             :     double      nlimit;
     281             : 
     282       27324 :     switch (strategy)
     283             :     {
     284       16382 :         case SimilarityStrategyNumber:
     285             :         case WordSimilarityStrategyNumber:
     286             :         case StrictWordSimilarityStrategyNumber:
     287       16382 :             nlimit = index_strategy_get_limit(strategy);
     288             : 
     289             :             /* Count the matches */
     290       16382 :             ntrue = 0;
     291      173818 :             for (i = 0; i < nkeys; i++)
     292             :             {
     293      157436 :                 if (check[i] != GIN_FALSE)
     294       66344 :                     ntrue++;
     295             :             }
     296             : 
     297             :             /*
     298             :              * See comment in gin_trgm_consistent() about * upper bound
     299             :              * formula
     300             :              */
     301       32764 :             res = (nkeys == 0)
     302       16382 :                 ? GIN_FALSE : (((((float4) ntrue) / ((float4) nkeys)) >= nlimit)
     303             :                                ? GIN_MAYBE : GIN_FALSE);
     304       16382 :             break;
     305        8092 :         case ILikeStrategyNumber:
     306             : #ifndef IGNORECASE
     307             :             elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
     308             : #endif
     309             :             /* FALL THRU */
     310             :         case LikeStrategyNumber:
     311             :         case EqualStrategyNumber:
     312             :             /* Check if all extracted trigrams are presented. */
     313        8092 :             res = GIN_MAYBE;
     314       16364 :             for (i = 0; i < nkeys; i++)
     315             :             {
     316        8312 :                 if (check[i] == GIN_FALSE)
     317             :                 {
     318          40 :                     res = GIN_FALSE;
     319          40 :                     break;
     320             :                 }
     321             :             }
     322        8092 :             break;
     323        2850 :         case RegExpICaseStrategyNumber:
     324             : #ifndef IGNORECASE
     325             :             elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
     326             : #endif
     327             :             /* FALL THRU */
     328             :         case RegExpStrategyNumber:
     329        2850 :             if (nkeys < 1)
     330             :             {
     331             :                 /* Regex processing gave no result: do full index scan */
     332          80 :                 res = GIN_MAYBE;
     333             :             }
     334             :             else
     335             :             {
     336             :                 /*
     337             :                  * As trigramsMatchGraph implements a monotonic boolean
     338             :                  * function, promoting all GIN_MAYBE keys to GIN_TRUE will
     339             :                  * give a conservative result.
     340             :                  */
     341        2770 :                 boolcheck = (bool *) palloc(sizeof(bool) * nkeys);
     342      638578 :                 for (i = 0; i < nkeys; i++)
     343      635808 :                     boolcheck[i] = (check[i] != GIN_FALSE);
     344        2770 :                 if (!trigramsMatchGraph((TrgmPackedGraph *) extra_data[0],
     345             :                                         boolcheck))
     346          12 :                     res = GIN_FALSE;
     347        2770 :                 pfree(boolcheck);
     348             :             }
     349        2850 :             break;
     350           0 :         default:
     351           0 :             elog(ERROR, "unrecognized strategy number: %d", strategy);
     352             :             res = GIN_FALSE;    /* keep compiler quiet */
     353             :             break;
     354             :     }
     355             : 
     356             :     /* All cases served by this function are inexact */
     357             :     Assert(res != GIN_TRUE);
     358       27324 :     PG_RETURN_GIN_TERNARY_VALUE(res);
     359             : }

Generated by: LCOV version 1.13