LCOV - code coverage report
Current view: top level - contrib/pg_trgm - trgm_gin.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 106 135 78.5 %
Date: 2024-11-21 08:14:44 Functions: 8 10 80.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * contrib/pg_trgm/trgm_gin.c
       3             :  */
       4             : #include "postgres.h"
       5             : 
       6             : #include "access/gin.h"
       7             : #include "access/stratnum.h"
       8             : #include "fmgr.h"
       9             : #include "trgm.h"
      10             : #include "varatt.h"
      11             : 
      12           0 : PG_FUNCTION_INFO_V1(gin_extract_trgm);
      13           8 : PG_FUNCTION_INFO_V1(gin_extract_value_trgm);
      14           8 : PG_FUNCTION_INFO_V1(gin_extract_query_trgm);
      15           8 : PG_FUNCTION_INFO_V1(gin_trgm_consistent);
      16           8 : PG_FUNCTION_INFO_V1(gin_trgm_triconsistent);
      17             : 
      18             : /*
      19             :  * This function can only be called if a pre-9.1 version of the GIN operator
      20             :  * class definition is present in the catalogs (probably as a consequence
      21             :  * of upgrade-in-place).  Cope.
      22             :  */
      23             : Datum
      24           0 : gin_extract_trgm(PG_FUNCTION_ARGS)
      25             : {
      26           0 :     if (PG_NARGS() == 3)
      27           0 :         return gin_extract_value_trgm(fcinfo);
      28           0 :     if (PG_NARGS() == 7)
      29           0 :         return gin_extract_query_trgm(fcinfo);
      30           0 :     elog(ERROR, "unexpected number of arguments to gin_extract_trgm");
      31             :     PG_RETURN_NULL();
      32             : }
      33             : 
      34             : Datum
      35        4808 : gin_extract_value_trgm(PG_FUNCTION_ARGS)
      36             : {
      37        4808 :     text       *val = (text *) PG_GETARG_TEXT_PP(0);
      38        4808 :     int32      *nentries = (int32 *) PG_GETARG_POINTER(1);
      39        4808 :     Datum      *entries = NULL;
      40             :     TRGM       *trg;
      41             :     int32       trglen;
      42             : 
      43        4808 :     *nentries = 0;
      44             : 
      45        4808 :     trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val));
      46        4808 :     trglen = ARRNELEM(trg);
      47             : 
      48        4808 :     if (trglen > 0)
      49             :     {
      50             :         trgm       *ptr;
      51             :         int32       i;
      52             : 
      53        4808 :         *nentries = trglen;
      54        4808 :         entries = (Datum *) palloc(sizeof(Datum) * trglen);
      55             : 
      56        4808 :         ptr = GETARR(trg);
      57       71262 :         for (i = 0; i < trglen; i++)
      58             :         {
      59       66454 :             int32       item = trgm2int(ptr);
      60             : 
      61       66454 :             entries[i] = Int32GetDatum(item);
      62       66454 :             ptr++;
      63             :         }
      64             :     }
      65             : 
      66        4808 :     PG_RETURN_POINTER(entries);
      67             : }
      68             : 
      69             : Datum
      70         344 : gin_extract_query_trgm(PG_FUNCTION_ARGS)
      71             : {
      72         344 :     text       *val = (text *) PG_GETARG_TEXT_PP(0);
      73         344 :     int32      *nentries = (int32 *) PG_GETARG_POINTER(1);
      74         344 :     StrategyNumber strategy = PG_GETARG_UINT16(2);
      75             : 
      76             :     /* bool   **pmatch = (bool **) PG_GETARG_POINTER(3); */
      77         344 :     Pointer   **extra_data = (Pointer **) PG_GETARG_POINTER(4);
      78             : 
      79             :     /* bool   **nullFlags = (bool **) PG_GETARG_POINTER(5); */
      80         344 :     int32      *searchMode = (int32 *) PG_GETARG_POINTER(6);
      81         344 :     Datum      *entries = NULL;
      82             :     TRGM       *trg;
      83             :     int32       trglen;
      84             :     trgm       *ptr;
      85             :     TrgmPackedGraph *graph;
      86             :     int32       i;
      87             : 
      88         344 :     switch (strategy)
      89             :     {
      90         160 :         case SimilarityStrategyNumber:
      91             :         case WordSimilarityStrategyNumber:
      92             :         case StrictWordSimilarityStrategyNumber:
      93             :         case EqualStrategyNumber:
      94         160 :             trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val));
      95         160 :             break;
      96          96 :         case ILikeStrategyNumber:
      97             : #ifndef IGNORECASE
      98             :             elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
      99             : #endif
     100             :             /* FALL THRU */
     101             :         case LikeStrategyNumber:
     102             : 
     103             :             /*
     104             :              * For wildcard search we extract all the trigrams that every
     105             :              * potentially-matching string must include.
     106             :              */
     107          96 :             trg = generate_wildcard_trgm(VARDATA_ANY(val),
     108          96 :                                          VARSIZE_ANY_EXHDR(val));
     109          96 :             break;
     110          88 :         case RegExpICaseStrategyNumber:
     111             : #ifndef IGNORECASE
     112             :             elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
     113             : #endif
     114             :             /* FALL THRU */
     115             :         case RegExpStrategyNumber:
     116          88 :             trg = createTrgmNFA(val, PG_GET_COLLATION(),
     117             :                                 &graph, CurrentMemoryContext);
     118          88 :             if (trg && ARRNELEM(trg) > 0)
     119             :             {
     120             :                 /*
     121             :                  * Successful regex processing: store NFA-like graph as
     122             :                  * extra_data.  GIN API requires an array of nentries
     123             :                  * Pointers, but we just put the same value in each element.
     124             :                  */
     125          68 :                 trglen = ARRNELEM(trg);
     126          68 :                 *extra_data = (Pointer *) palloc(sizeof(Pointer) * trglen);
     127        1696 :                 for (i = 0; i < trglen; i++)
     128        1628 :                     (*extra_data)[i] = (Pointer) graph;
     129             :             }
     130             :             else
     131             :             {
     132             :                 /* No result: have to do full index scan. */
     133          20 :                 *nentries = 0;
     134          20 :                 *searchMode = GIN_SEARCH_MODE_ALL;
     135          20 :                 PG_RETURN_POINTER(entries);
     136             :             }
     137          68 :             break;
     138           0 :         default:
     139           0 :             elog(ERROR, "unrecognized strategy number: %d", strategy);
     140             :             trg = NULL;         /* keep compiler quiet */
     141             :             break;
     142             :     }
     143             : 
     144         324 :     trglen = ARRNELEM(trg);
     145         324 :     *nentries = trglen;
     146             : 
     147         324 :     if (trglen > 0)
     148             :     {
     149         276 :         entries = (Datum *) palloc(sizeof(Datum) * trglen);
     150         276 :         ptr = GETARR(trg);
     151        3368 :         for (i = 0; i < trglen; i++)
     152             :         {
     153        3092 :             int32       item = trgm2int(ptr);
     154             : 
     155        3092 :             entries[i] = Int32GetDatum(item);
     156        3092 :             ptr++;
     157             :         }
     158             :     }
     159             : 
     160             :     /*
     161             :      * If no trigram was extracted then we have to scan all the index.
     162             :      */
     163         324 :     if (trglen == 0)
     164          48 :         *searchMode = GIN_SEARCH_MODE_ALL;
     165             : 
     166         324 :     PG_RETURN_POINTER(entries);
     167             : }
     168             : 
     169             : Datum
     170          16 : gin_trgm_consistent(PG_FUNCTION_ARGS)
     171             : {
     172          16 :     bool       *check = (bool *) PG_GETARG_POINTER(0);
     173          16 :     StrategyNumber strategy = PG_GETARG_UINT16(1);
     174             : 
     175             :     /* text    *query = PG_GETARG_TEXT_PP(2); */
     176          16 :     int32       nkeys = PG_GETARG_INT32(3);
     177          16 :     Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
     178          16 :     bool       *recheck = (bool *) PG_GETARG_POINTER(5);
     179             :     bool        res;
     180             :     int32       i,
     181             :                 ntrue;
     182             :     double      nlimit;
     183             : 
     184             :     /* All cases served by this function are inexact */
     185          16 :     *recheck = true;
     186             : 
     187          16 :     switch (strategy)
     188             :     {
     189           0 :         case SimilarityStrategyNumber:
     190             :         case WordSimilarityStrategyNumber:
     191             :         case StrictWordSimilarityStrategyNumber:
     192           0 :             nlimit = index_strategy_get_limit(strategy);
     193             : 
     194             :             /* Count the matches */
     195           0 :             ntrue = 0;
     196           0 :             for (i = 0; i < nkeys; i++)
     197             :             {
     198           0 :                 if (check[i])
     199           0 :                     ntrue++;
     200             :             }
     201             : 
     202             :             /*--------------------
     203             :              * If DIVUNION is defined then similarity formula is:
     204             :              * c / (len1 + len2 - c)
     205             :              * where c is number of common trigrams and it stands as ntrue in
     206             :              * this code.  Here we don't know value of len2 but we can assume
     207             :              * that c (ntrue) is a lower bound of len2, so upper bound of
     208             :              * similarity is:
     209             :              * c / (len1 + c - c)  => c / len1
     210             :              * If DIVUNION is not defined then similarity formula is:
     211             :              * c / max(len1, len2)
     212             :              * And again, c (ntrue) is a lower bound of len2, but c <= len1
     213             :              * just by definition and, consequently, upper bound of
     214             :              * similarity is just c / len1.
     215             :              * So, independently on DIVUNION the upper bound formula is the same.
     216             :              */
     217           0 :             res = (nkeys == 0) ? false :
     218           0 :                 (((((float4) ntrue) / ((float4) nkeys))) >= nlimit);
     219           0 :             break;
     220          16 :         case ILikeStrategyNumber:
     221             : #ifndef IGNORECASE
     222             :             elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
     223             : #endif
     224             :             /* FALL THRU */
     225             :         case LikeStrategyNumber:
     226             :         case EqualStrategyNumber:
     227             :             /* Check if all extracted trigrams are presented. */
     228          16 :             res = true;
     229          32 :             for (i = 0; i < nkeys; i++)
     230             :             {
     231          16 :                 if (!check[i])
     232             :                 {
     233           0 :                     res = false;
     234           0 :                     break;
     235             :                 }
     236             :             }
     237          16 :             break;
     238           0 :         case RegExpICaseStrategyNumber:
     239             : #ifndef IGNORECASE
     240             :             elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
     241             : #endif
     242             :             /* FALL THRU */
     243             :         case RegExpStrategyNumber:
     244           0 :             if (nkeys < 1)
     245             :             {
     246             :                 /* Regex processing gave no result: do full index scan */
     247           0 :                 res = true;
     248             :             }
     249             :             else
     250           0 :                 res = trigramsMatchGraph((TrgmPackedGraph *) extra_data[0],
     251             :                                          check);
     252           0 :             break;
     253           0 :         default:
     254           0 :             elog(ERROR, "unrecognized strategy number: %d", strategy);
     255             :             res = false;        /* keep compiler quiet */
     256             :             break;
     257             :     }
     258             : 
     259          16 :     PG_RETURN_BOOL(res);
     260             : }
     261             : 
     262             : /*
     263             :  * In all cases, GIN_TRUE is at least as favorable to inclusion as
     264             :  * GIN_MAYBE. If no better option is available, simply treat
     265             :  * GIN_MAYBE as if it were GIN_TRUE and apply the same test as the binary
     266             :  * consistent function.
     267             :  */
     268             : Datum
     269       28716 : gin_trgm_triconsistent(PG_FUNCTION_ARGS)
     270             : {
     271       28716 :     GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
     272       28716 :     StrategyNumber strategy = PG_GETARG_UINT16(1);
     273             : 
     274             :     /* text    *query = PG_GETARG_TEXT_PP(2); */
     275       28716 :     int32       nkeys = PG_GETARG_INT32(3);
     276       28716 :     Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
     277       28716 :     GinTernaryValue res = GIN_MAYBE;
     278             :     int32       i,
     279             :                 ntrue;
     280             :     bool       *boolcheck;
     281             :     double      nlimit;
     282             : 
     283       28716 :     switch (strategy)
     284             :     {
     285       16382 :         case SimilarityStrategyNumber:
     286             :         case WordSimilarityStrategyNumber:
     287             :         case StrictWordSimilarityStrategyNumber:
     288       16382 :             nlimit = index_strategy_get_limit(strategy);
     289             : 
     290             :             /* Count the matches */
     291       16382 :             ntrue = 0;
     292      173818 :             for (i = 0; i < nkeys; i++)
     293             :             {
     294      157436 :                 if (check[i] != GIN_FALSE)
     295       66344 :                     ntrue++;
     296             :             }
     297             : 
     298             :             /*
     299             :              * See comment in gin_trgm_consistent() about * upper bound
     300             :              * formula
     301             :              */
     302       32764 :             res = (nkeys == 0)
     303       16382 :                 ? GIN_FALSE : (((((float4) ntrue) / ((float4) nkeys)) >= nlimit)
     304             :                                ? GIN_MAYBE : GIN_FALSE);
     305       16382 :             break;
     306        8092 :         case ILikeStrategyNumber:
     307             : #ifndef IGNORECASE
     308             :             elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
     309             : #endif
     310             :             /* FALL THRU */
     311             :         case LikeStrategyNumber:
     312             :         case EqualStrategyNumber:
     313             :             /* Check if all extracted trigrams are presented. */
     314        8092 :             res = GIN_MAYBE;
     315       16364 :             for (i = 0; i < nkeys; i++)
     316             :             {
     317        8312 :                 if (check[i] == GIN_FALSE)
     318             :                 {
     319          40 :                     res = GIN_FALSE;
     320          40 :                     break;
     321             :                 }
     322             :             }
     323        8092 :             break;
     324        4242 :         case RegExpICaseStrategyNumber:
     325             : #ifndef IGNORECASE
     326             :             elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
     327             : #endif
     328             :             /* FALL THRU */
     329             :         case RegExpStrategyNumber:
     330        4242 :             if (nkeys < 1)
     331             :             {
     332             :                 /* Regex processing gave no result: do full index scan */
     333        1472 :                 res = GIN_MAYBE;
     334             :             }
     335             :             else
     336             :             {
     337             :                 /*
     338             :                  * As trigramsMatchGraph implements a monotonic boolean
     339             :                  * function, promoting all GIN_MAYBE keys to GIN_TRUE will
     340             :                  * give a conservative result.
     341             :                  */
     342        2770 :                 boolcheck = (bool *) palloc(sizeof(bool) * nkeys);
     343      638578 :                 for (i = 0; i < nkeys; i++)
     344      635808 :                     boolcheck[i] = (check[i] != GIN_FALSE);
     345        2770 :                 if (!trigramsMatchGraph((TrgmPackedGraph *) extra_data[0],
     346             :                                         boolcheck))
     347          12 :                     res = GIN_FALSE;
     348        2770 :                 pfree(boolcheck);
     349             :             }
     350        4242 :             break;
     351           0 :         default:
     352           0 :             elog(ERROR, "unrecognized strategy number: %d", strategy);
     353             :             res = GIN_FALSE;    /* keep compiler quiet */
     354             :             break;
     355             :     }
     356             : 
     357             :     /* All cases served by this function are inexact */
     358             :     Assert(res != GIN_TRUE);
     359       28716 :     PG_RETURN_GIN_TERNARY_VALUE(res);
     360             : }

Generated by: LCOV version 1.14