LCOV - PostgreSQL 19devel - contrib/pg_trgm/trgm

LCOV - code coverage report

Current view:	top level - contrib/pg_trgm - trgm_op.c (source / functions)		Hit	Total	Coverage
Test:	PostgreSQL 19devel	Lines:	454	500	90.8 %
Date:	2025-08-17 01:17:32	Functions:	52	55	94.5 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /*
       2             :  * contrib/pg_trgm/trgm_op.c
       3             :  */
       4             : #include "postgres.h"
       5             : 
       6             : #include <ctype.h>
       7             : 
       8             : #include "catalog/pg_collation_d.h"
       9             : #include "catalog/pg_type.h"
      10             : #include "common/int.h"
      11             : #include "lib/qunique.h"
      12             : #include "miscadmin.h"
      13             : #include "trgm.h"
      14             : #include "tsearch/ts_locale.h"
      15             : #include "utils/formatting.h"
      16             : #include "utils/guc.h"
      17             : #include "utils/lsyscache.h"
      18             : #include "utils/memutils.h"
      19             : #include "utils/pg_crc.h"
      20             : 
      21           6 : PG_MODULE_MAGIC_EXT(
      22             :                     .name = "pg_trgm",
      23             :                     .version = PG_VERSION
      24             : );
      25             : 
      26             : /* GUC variables */
      27             : double      similarity_threshold = 0.3f;
      28             : double      word_similarity_threshold = 0.6f;
      29             : double      strict_word_similarity_threshold = 0.5f;
      30             : 
      31           4 : PG_FUNCTION_INFO_V1(set_limit);
      32           4 : PG_FUNCTION_INFO_V1(show_limit);
      33           4 : PG_FUNCTION_INFO_V1(show_trgm);
      34           4 : PG_FUNCTION_INFO_V1(similarity);
      35           4 : PG_FUNCTION_INFO_V1(word_similarity);
      36           4 : PG_FUNCTION_INFO_V1(strict_word_similarity);
      37           4 : PG_FUNCTION_INFO_V1(similarity_dist);
      38           4 : PG_FUNCTION_INFO_V1(similarity_op);
      39           4 : PG_FUNCTION_INFO_V1(word_similarity_op);
      40           4 : PG_FUNCTION_INFO_V1(word_similarity_commutator_op);
      41           2 : PG_FUNCTION_INFO_V1(word_similarity_dist_op);
      42           4 : PG_FUNCTION_INFO_V1(word_similarity_dist_commutator_op);
      43           4 : PG_FUNCTION_INFO_V1(strict_word_similarity_op);
      44           4 : PG_FUNCTION_INFO_V1(strict_word_similarity_commutator_op);
      45           2 : PG_FUNCTION_INFO_V1(strict_word_similarity_dist_op);
      46           4 : PG_FUNCTION_INFO_V1(strict_word_similarity_dist_commutator_op);
      47             : 
      48             : static int  CMPTRGM_CHOOSE(const void *a, const void *b);
      49             : int         (*CMPTRGM) (const void *a, const void *b) = CMPTRGM_CHOOSE;
      50             : 
      51             : /* Trigram with position */
      52             : typedef struct
      53             : {
      54             :     trgm        trg;
      55             :     int         index;
      56             : } pos_trgm;
      57             : 
      58             : /* Trigram bound type */
      59             : typedef uint8 TrgmBound;
      60             : #define TRGM_BOUND_LEFT             0x01    /* trigram is left bound of word */
      61             : #define TRGM_BOUND_RIGHT            0x02    /* trigram is right bound of word */
      62             : 
      63             : /* Word similarity flags */
      64             : #define WORD_SIMILARITY_CHECK_ONLY  0x01    /* only check existence of similar
      65             :                                              * search pattern in text */
      66             : #define WORD_SIMILARITY_STRICT      0x02    /* force bounds of extent to match
      67             :                                              * word bounds */
      68             : 
      69             : /*
      70             :  * Module load callback
      71             :  */
      72             : void
      73           6 : _PG_init(void)
      74             : {
      75             :     /* Define custom GUC variables. */
      76           6 :     DefineCustomRealVariable("pg_trgm.similarity_threshold",
      77             :                              "Sets the threshold used by the % operator.",
      78             :                              "Valid range is 0.0 .. 1.0.",
      79             :                              &similarity_threshold,
      80             :                              0.3f,
      81             :                              0.0,
      82             :                              1.0,
      83             :                              PGC_USERSET,
      84             :                              0,
      85             :                              NULL,
      86             :                              NULL,
      87             :                              NULL);
      88           6 :     DefineCustomRealVariable("pg_trgm.word_similarity_threshold",
      89             :                              "Sets the threshold used by the <% operator.",
      90             :                              "Valid range is 0.0 .. 1.0.",
      91             :                              &word_similarity_threshold,
      92             :                              0.6f,
      93             :                              0.0,
      94             :                              1.0,
      95             :                              PGC_USERSET,
      96             :                              0,
      97             :                              NULL,
      98             :                              NULL,
      99             :                              NULL);
     100           6 :     DefineCustomRealVariable("pg_trgm.strict_word_similarity_threshold",
     101             :                              "Sets the threshold used by the <<% operator.",
     102             :                              "Valid range is 0.0 .. 1.0.",
     103             :                              &strict_word_similarity_threshold,
     104             :                              0.5f,
     105             :                              0.0,
     106             :                              1.0,
     107             :                              PGC_USERSET,
     108             :                              0,
     109             :                              NULL,
     110             :                              NULL,
     111             :                              NULL);
     112             : 
     113           6 :     MarkGUCPrefixReserved("pg_trgm");
     114           6 : }
     115             : 
     116             : #define CMPCHAR(a,b) ( ((a)==(b)) ? 0 : ( ((a)<(b)) ? -1 : 1 ) )
     117             : 
     118             : /*
     119             :  * Functions for comparing two trgms while treating each char as "signed char" or
     120             :  * "unsigned char".
     121             :  */
     122             : static inline int
     123    14908162 : CMPTRGM_SIGNED(const void *a, const void *b)
     124             : {
     125             : #define CMPPCHAR_S(a,b,i)  CMPCHAR( *(((const signed char*)(a))+i), *(((const signed char*)(b))+i) )
     126             : 
     127    10735358 :     return CMPPCHAR_S(a, b, 0) ? CMPPCHAR_S(a, b, 0)
     128    32493982 :         : (CMPPCHAR_S(a, b, 1) ? CMPPCHAR_S(a, b, 1)
     129     6850462 :            : CMPPCHAR_S(a, b, 2));
     130             : }
     131             : 
     132             : static inline int
     133           0 : CMPTRGM_UNSIGNED(const void *a, const void *b)
     134             : {
     135             : #define CMPPCHAR_UNS(a,b,i)  CMPCHAR( *(((const unsigned char*)(a))+i), *(((const unsigned char*)(b))+i) )
     136             : 
     137           0 :     return CMPPCHAR_UNS(a, b, 0) ? CMPPCHAR_UNS(a, b, 0)
     138           0 :         : (CMPPCHAR_UNS(a, b, 1) ? CMPPCHAR_UNS(a, b, 1)
     139           0 :            : CMPPCHAR_UNS(a, b, 2));
     140             : }
     141             : 
     142             : /*
     143             :  * This gets called on the first call. It replaces the function pointer so
     144             :  * that subsequent calls are routed directly to the chosen implementation.
     145             :  */
     146             : static int
     147           6 : CMPTRGM_CHOOSE(const void *a, const void *b)
     148             : {
     149           6 :     if (GetDefaultCharSignedness())
     150           6 :         CMPTRGM = CMPTRGM_SIGNED;
     151             :     else
     152           0 :         CMPTRGM = CMPTRGM_UNSIGNED;
     153             : 
     154           6 :     return CMPTRGM(a, b);
     155             : }
     156             : 
     157             : /*
     158             :  * Deprecated function.
     159             :  * Use "pg_trgm.similarity_threshold" GUC variable instead of this function.
     160             :  */
     161             : Datum
     162           4 : set_limit(PG_FUNCTION_ARGS)
     163             : {
     164           4 :     float4      nlimit = PG_GETARG_FLOAT4(0);
     165             :     char       *nlimit_str;
     166             :     Oid         func_out_oid;
     167             :     bool        is_varlena;
     168             : 
     169           4 :     getTypeOutputInfo(FLOAT4OID, &func_out_oid, &is_varlena);
     170             : 
     171           4 :     nlimit_str = OidOutputFunctionCall(func_out_oid, Float4GetDatum(nlimit));
     172             : 
     173           4 :     SetConfigOption("pg_trgm.similarity_threshold", nlimit_str,
     174             :                     PGC_USERSET, PGC_S_SESSION);
     175             : 
     176           4 :     PG_RETURN_FLOAT4(similarity_threshold);
     177             : }
     178             : 
     179             : 
     180             : /*
     181             :  * Get similarity threshold for given index scan strategy number.
     182             :  */
     183             : double
     184       86366 : index_strategy_get_limit(StrategyNumber strategy)
     185             : {
     186       86366 :     switch (strategy)
     187             :     {
     188       64774 :         case SimilarityStrategyNumber:
     189       64774 :             return similarity_threshold;
     190        9636 :         case WordSimilarityStrategyNumber:
     191        9636 :             return word_similarity_threshold;
     192       11956 :         case StrictWordSimilarityStrategyNumber:
     193       11956 :             return strict_word_similarity_threshold;
     194           0 :         default:
     195           0 :             elog(ERROR, "unrecognized strategy number: %d", strategy);
     196             :             break;
     197             :     }
     198             : 
     199             :     return 0.0;                 /* keep compiler quiet */
     200             : }
     201             : 
     202             : /*
     203             :  * Deprecated function.
     204             :  * Use "pg_trgm.similarity_threshold" GUC variable instead of this function.
     205             :  */
     206             : Datum
     207       40000 : show_limit(PG_FUNCTION_ARGS)
     208             : {
     209       40000 :     PG_RETURN_FLOAT4(similarity_threshold);
     210             : }
     211             : 
     212             : static int
     213     6374440 : comp_trgm(const void *a, const void *b)
     214             : {
     215     6374440 :     return CMPTRGM(a, b);
     216             : }
     217             : 
     218             : /*
     219             :  * Finds first word in string, returns pointer to the word,
     220             :  * endword points to the character after word
     221             :  */
     222             : static char *
     223      482820 : find_word(char *str, int lenstr, char **endword, int *charlen)
     224             : {
     225      482820 :     char       *beginword = str;
     226             : 
     227      510146 :     while (beginword - str < lenstr && !ISWORDCHR(beginword))
     228       27326 :         beginword += pg_mblen(beginword);
     229             : 
     230      482820 :     if (beginword - str >= lenstr)
     231      228160 :         return NULL;
     232             : 
     233      254660 :     *endword = beginword;
     234      254660 :     *charlen = 0;
     235     2199332 :     while (*endword - str < lenstr && ISWORDCHR(*endword))
     236             :     {
     237     1944672 :         *endword += pg_mblen(*endword);
     238     1944672 :         (*charlen)++;
     239             :     }
     240             : 
     241      254660 :     return beginword;
     242             : }
     243             : 
     244             : /*
     245             :  * Reduce a trigram (three possibly multi-byte characters) to a trgm,
     246             :  * which is always exactly three bytes.  If we have three single-byte
     247             :  * characters, we just use them as-is; otherwise we form a hash value.
     248             :  */
     249             : void
     250        2918 : compact_trigram(trgm *tptr, char *str, int bytelen)
     251             : {
     252        2918 :     if (bytelen == 3)
     253             :     {
     254        2918 :         CPTRGM(tptr, str);
     255             :     }
     256             :     else
     257             :     {
     258             :         pg_crc32    crc;
     259             : 
     260           0 :         INIT_LEGACY_CRC32(crc);
     261           0 :         COMP_LEGACY_CRC32(crc, str, bytelen);
     262           0 :         FIN_LEGACY_CRC32(crc);
     263             : 
     264             :         /*
     265             :          * use only 3 upper bytes from crc, hope, it's good enough hashing
     266             :          */
     267           0 :         CPTRGM(tptr, &crc);
     268             :     }
     269        2918 : }
     270             : 
     271             : /*
     272             :  * Adds trigrams from words (already padded).
     273             :  */
     274             : static trgm *
     275      254788 : make_trigrams(trgm *tptr, char *str, int bytelen, int charlen)
     276             : {
     277      254788 :     char       *ptr = str;
     278             : 
     279      254788 :     if (charlen < 3)
     280          54 :         return tptr;
     281             : 
     282      254734 :     if (bytelen > charlen)
     283             :     {
     284             :         /* Find multibyte character boundaries and apply compact_trigram */
     285           0 :         int         lenfirst = pg_mblen(str),
     286           0 :                     lenmiddle = pg_mblen(str + lenfirst),
     287           0 :                     lenlast = pg_mblen(str + lenfirst + lenmiddle);
     288             : 
     289           0 :         while ((ptr - str) + lenfirst + lenmiddle + lenlast <= bytelen)
     290             :         {
     291           0 :             compact_trigram(tptr, ptr, lenfirst + lenmiddle + lenlast);
     292             : 
     293           0 :             ptr += lenfirst;
     294           0 :             tptr++;
     295             : 
     296           0 :             lenfirst = lenmiddle;
     297           0 :             lenmiddle = lenlast;
     298           0 :             lenlast = pg_mblen(ptr + lenfirst + lenmiddle);
     299             :         }
     300             :     }
     301             :     else
     302             :     {
     303             :         /* Fast path when there are no multibyte characters */
     304             :         Assert(bytelen == charlen);
     305             : 
     306     2454248 :         while (ptr - str < bytelen - 2 /* number of trigrams = strlen - 2 */ )
     307             :         {
     308     2199514 :             CPTRGM(tptr, ptr);
     309     2199514 :             ptr++;
     310     2199514 :             tptr++;
     311             :         }
     312             :     }
     313             : 
     314      254734 :     return tptr;
     315             : }
     316             : 
     317             : /*
     318             :  * Make array of trigrams without sorting and removing duplicate items.
     319             :  *
     320             :  * trg: where to return the array of trigrams.
     321             :  * str: source string, of length slen bytes.
     322             :  * bounds: where to return bounds of trigrams (if needed).
     323             :  *
     324             :  * Returns length of the generated array.
     325             :  */
     326             : static int
     327      228162 : generate_trgm_only(trgm *trg, char *str, int slen, TrgmBound *bounds)
     328             : {
     329             :     trgm       *tptr;
     330             :     char       *buf;
     331             :     int         charlen,
     332             :                 bytelen;
     333             :     char       *bword,
     334             :                *eword;
     335             : 
     336      228162 :     if (slen + LPADDING + RPADDING < 3 || slen == 0)
     337           2 :         return 0;
     338             : 
     339      228160 :     tptr = trg;
     340             : 
     341             :     /* Allocate a buffer for case-folded, blank-padded words */
     342      228160 :     buf = (char *) palloc(slen * pg_database_encoding_max_length() + 4);
     343             : 
     344             :     if (LPADDING > 0)
     345             :     {
     346      228160 :         *buf = ' ';
     347             :         if (LPADDING > 1)
     348      228160 :             *(buf + 1) = ' ';
     349             :     }
     350             : 
     351      228160 :     eword = str;
     352      482820 :     while ((bword = find_word(eword, slen - (eword - str), &eword, &charlen)) != NULL)
     353             :     {
     354             : #ifdef IGNORECASE
     355      254660 :         bword = str_tolower(bword, eword - bword, DEFAULT_COLLATION_OID);
     356      254660 :         bytelen = strlen(bword);
     357             : #else
     358             :         bytelen = eword - bword;
     359             : #endif
     360             : 
     361      254660 :         memcpy(buf + LPADDING, bword, bytelen);
     362             : 
     363             : #ifdef IGNORECASE
     364      254660 :         pfree(bword);
     365             : #endif
     366             : 
     367      254660 :         buf[LPADDING + bytelen] = ' ';
     368      254660 :         buf[LPADDING + bytelen + 1] = ' ';
     369             : 
     370             :         /* Calculate trigrams marking their bounds if needed */
     371      254660 :         if (bounds)
     372       24794 :             bounds[tptr - trg] |= TRGM_BOUND_LEFT;
     373      254660 :         tptr = make_trigrams(tptr, buf, bytelen + LPADDING + RPADDING,
     374             :                              charlen + LPADDING + RPADDING);
     375      254660 :         if (bounds)
     376       24794 :             bounds[tptr - trg - 1] |= TRGM_BOUND_RIGHT;
     377             :     }
     378             : 
     379      228160 :     pfree(buf);
     380             : 
     381      228160 :     return tptr - trg;
     382             : }
     383             : 
     384             : /*
     385             :  * Guard against possible overflow in the palloc requests below.  (We
     386             :  * don't worry about the additive constants, since palloc can detect
     387             :  * requests that are a little above MaxAllocSize --- we just need to
     388             :  * prevent integer overflow in the multiplications.)
     389             :  */
     390             : static void
     391      204020 : protect_out_of_mem(int slen)
     392             : {
     393      204020 :     if ((Size) (slen / 2) >= (MaxAllocSize / (sizeof(trgm) * 3)) ||
     394      204020 :         (Size) slen >= (MaxAllocSize / pg_database_encoding_max_length()))
     395           0 :         ereport(ERROR,
     396             :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     397             :                  errmsg("out of memory")));
     398      204020 : }
     399             : 
     400             : /*
     401             :  * Make array of trigrams with sorting and removing duplicate items.
     402             :  *
     403             :  * str: source string, of length slen bytes.
     404             :  *
     405             :  * Returns the sorted array of unique trigrams.
     406             :  */
     407             : TRGM *
     408      179658 : generate_trgm(char *str, int slen)
     409             : {
     410             :     TRGM       *trg;
     411             :     int         len;
     412             : 
     413      179658 :     protect_out_of_mem(slen);
     414             : 
     415      179658 :     trg = (TRGM *) palloc(TRGMHDRSIZE + sizeof(trgm) * (slen / 2 + 1) * 3);
     416      179658 :     trg->flag = ARRKEY;
     417             : 
     418      179658 :     len = generate_trgm_only(GETARR(trg), str, slen, NULL);
     419      179658 :     SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len));
     420             : 
     421      179658 :     if (len == 0)
     422           8 :         return trg;
     423             : 
     424             :     /*
     425             :      * Make trigrams unique.
     426             :      */
     427      179650 :     if (len > 1)
     428             :     {
     429      179650 :         qsort(GETARR(trg), len, sizeof(trgm), comp_trgm);
     430      179650 :         len = qunique(GETARR(trg), len, sizeof(trgm), comp_trgm);
     431             :     }
     432             : 
     433      179650 :     SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len));
     434             : 
     435      179650 :     return trg;
     436             : }
     437             : 
     438             : /*
     439             :  * Make array of positional trigrams from two trigram arrays trg1 and trg2.
     440             :  *
     441             :  * trg1: trigram array of search pattern, of length len1. trg1 is required
     442             :  *       word which positions don't matter and replaced with -1.
     443             :  * trg2: trigram array of text, of length len2. trg2 is haystack where we
     444             :  *       search and have to store its positions.
     445             :  *
     446             :  * Returns concatenated trigram array.
     447             :  */
     448             : static pos_trgm *
     449       24252 : make_positional_trgm(trgm *trg1, int len1, trgm *trg2, int len2)
     450             : {
     451             :     pos_trgm   *result;
     452             :     int         i,
     453       24252 :                 len = len1 + len2;
     454             : 
     455       24252 :     result = (pos_trgm *) palloc(sizeof(pos_trgm) * len);
     456             : 
     457      241728 :     for (i = 0; i < len1; i++)
     458             :     {
     459      217476 :         memcpy(&result[i].trg, &trg1[i], sizeof(trgm));
     460      217476 :         result[i].index = -1;
     461             :     }
     462             : 
     463      384408 :     for (i = 0; i < len2; i++)
     464             :     {
     465      360156 :         memcpy(&result[i + len1].trg, &trg2[i], sizeof(trgm));
     466      360156 :         result[i + len1].index = i;
     467             :     }
     468             : 
     469       24252 :     return result;
     470             : }
     471             : 
     472             : /*
     473             :  * Compare position trigrams: compare trigrams first and position second.
     474             :  */
     475             : static int
     476     2615290 : comp_ptrgm(const void *v1, const void *v2)
     477             : {
     478     2615290 :     const pos_trgm *p1 = (const pos_trgm *) v1;
     479     2615290 :     const pos_trgm *p2 = (const pos_trgm *) v2;
     480             :     int         cmp;
     481             : 
     482     2615290 :     cmp = CMPTRGM(p1->trg, p2->trg);
     483     2615290 :     if (cmp != 0)
     484     2535874 :         return cmp;
     485             : 
     486       79416 :     return pg_cmp_s32(p1->index, p2->index);
     487             : }
     488             : 
     489             : /*
     490             :  * Iterative search function which calculates maximum similarity with word in
     491             :  * the string. Maximum similarity is only calculated only if the flag
     492             :  * WORD_SIMILARITY_CHECK_ONLY isn't set.
     493             :  *
     494             :  * trg2indexes: array which stores indexes of the array "found".
     495             :  * found: array which stores true of false values.
     496             :  * ulen1: count of unique trigrams of array "trg1".
     497             :  * len2: length of array "trg2" and array "trg2indexes".
     498             :  * len: length of the array "found".
     499             :  * flags: set of boolean flags parameterizing similarity calculation.
     500             :  * bounds: whether each trigram is left/right bound of word.
     501             :  *
     502             :  * Returns word similarity.
     503             :  */
     504             : static float4
     505       24252 : iterate_word_similarity(int *trg2indexes,
     506             :                         bool *found,
     507             :                         int ulen1,
     508             :                         int len2,
     509             :                         int len,
     510             :                         uint8 flags,
     511             :                         TrgmBound *bounds)
     512             : {
     513             :     int        *lastpos,
     514             :                 i,
     515       24252 :                 ulen2 = 0,
     516       24252 :                 count = 0,
     517       24252 :                 upper = -1,
     518             :                 lower;
     519             :     float4      smlr_cur,
     520       24252 :                 smlr_max = 0.0f;
     521             :     double      threshold;
     522             : 
     523             :     Assert(bounds || !(flags & WORD_SIMILARITY_STRICT));
     524             : 
     525             :     /* Select appropriate threshold */
     526       48504 :     threshold = (flags & WORD_SIMILARITY_STRICT) ?
     527       24252 :         strict_word_similarity_threshold :
     528             :         word_similarity_threshold;
     529             : 
     530             :     /*
     531             :      * Consider first trigram as initial lower bound for strict word
     532             :      * similarity, or initialize it later with first trigram present for plain
     533             :      * word similarity.
     534             :      */
     535       24252 :     lower = (flags & WORD_SIMILARITY_STRICT) ? 0 : -1;
     536             : 
     537             :     /* Memorise last position of each trigram */
     538       24252 :     lastpos = (int *) palloc(sizeof(int) * len);
     539       24252 :     memset(lastpos, -1, sizeof(int) * len);
     540             : 
     541      367268 :     for (i = 0; i < len2; i++)
     542             :     {
     543             :         int         trgindex;
     544             : 
     545      346584 :         CHECK_FOR_INTERRUPTS();
     546             : 
     547             :         /* Get index of next trigram */
     548      346584 :         trgindex = trg2indexes[i];
     549             : 
     550             :         /* Update last position of this trigram */
     551      346584 :         if (lower >= 0 || found[trgindex])
     552             :         {
     553      271568 :             if (lastpos[trgindex] < 0)
     554             :             {
     555      267860 :                 ulen2++;
     556      267860 :                 if (found[trgindex])
     557       61512 :                     count++;
     558             :             }
     559      271568 :             lastpos[trgindex] = i;
     560             :         }
     561             : 
     562             :         /*
     563             :          * Adjust upper bound if trigram is upper bound of word for strict
     564             :          * word similarity, or if trigram is present in required substring for
     565             :          * plain word similarity
     566             :          */
     567      500668 :         if ((flags & WORD_SIMILARITY_STRICT) ? (bounds[i] & TRGM_BOUND_RIGHT)
     568      154084 :             : found[trgindex])
     569             :         {
     570             :             int         prev_lower,
     571             :                         tmp_ulen2,
     572             :                         tmp_lower,
     573             :                         tmp_count;
     574             : 
     575       51272 :             upper = i;
     576       51272 :             if (lower == -1)
     577             :             {
     578        9390 :                 lower = i;
     579        9390 :                 ulen2 = 1;
     580             :             }
     581             : 
     582       51272 :             smlr_cur = CALCSML(count, ulen1, ulen2);
     583             : 
     584             :             /* Also try to adjust lower bound for greater similarity */
     585       51272 :             tmp_count = count;
     586       51272 :             tmp_ulen2 = ulen2;
     587       51272 :             prev_lower = lower;
     588      417190 :             for (tmp_lower = lower; tmp_lower <= upper; tmp_lower++)
     589             :             {
     590             :                 float       smlr_tmp;
     591             :                 int         tmp_trgindex;
     592             : 
     593             :                 /*
     594             :                  * Adjust lower bound only if trigram is lower bound of word
     595             :                  * for strict word similarity, or consider every trigram as
     596             :                  * lower bound for plain word similarity.
     597             :                  */
     598      369486 :                 if (!(flags & WORD_SIMILARITY_STRICT)
     599      290314 :                     || (bounds[tmp_lower] & TRGM_BOUND_LEFT))
     600             :                 {
     601      119432 :                     smlr_tmp = CALCSML(tmp_count, ulen1, tmp_ulen2);
     602      119432 :                     if (smlr_tmp > smlr_cur)
     603             :                     {
     604        7028 :                         smlr_cur = smlr_tmp;
     605        7028 :                         ulen2 = tmp_ulen2;
     606        7028 :                         lower = tmp_lower;
     607        7028 :                         count = tmp_count;
     608             :                     }
     609             : 
     610             :                     /*
     611             :                      * If we only check that word similarity is greater than
     612             :                      * threshold we do not need to calculate a maximum
     613             :                      * similarity.
     614             :                      */
     615      119432 :                     if ((flags & WORD_SIMILARITY_CHECK_ONLY)
     616       74228 :                         && smlr_cur >= threshold)
     617        3568 :                         break;
     618             :                 }
     619             : 
     620      365918 :                 tmp_trgindex = trg2indexes[tmp_lower];
     621      365918 :                 if (lastpos[tmp_trgindex] == tmp_lower)
     622             :                 {
     623      361394 :                     tmp_ulen2--;
     624      361394 :                     if (found[tmp_trgindex])
     625       93168 :                         tmp_count--;
     626             :                 }
     627             :             }
     628             : 
     629       51272 :             smlr_max = Max(smlr_max, smlr_cur);
     630             : 
     631             :             /*
     632             :              * if we only check that word similarity is greater than threshold
     633             :              * we do not need to calculate a maximum similarity.
     634             :              */
     635       51272 :             if ((flags & WORD_SIMILARITY_CHECK_ONLY) && smlr_max >= threshold)
     636        3568 :                 break;
     637             : 
     638       81220 :             for (tmp_lower = prev_lower; tmp_lower < lower; tmp_lower++)
     639             :             {
     640             :                 int         tmp_trgindex;
     641             : 
     642       33516 :                 tmp_trgindex = trg2indexes[tmp_lower];
     643       33516 :                 if (lastpos[tmp_trgindex] == tmp_lower)
     644       32016 :                     lastpos[tmp_trgindex] = -1;
     645             :             }
     646             :         }
     647             :     }
     648             : 
     649       24252 :     pfree(lastpos);
     650             : 
     651       24252 :     return smlr_max;
     652             : }
     653             : 
     654             : /*
     655             :  * Calculate word similarity.
     656             :  * This function prepare two arrays: "trg2indexes" and "found". Then this arrays
     657             :  * are used to calculate word similarity using iterate_word_similarity().
     658             :  *
     659             :  * "trg2indexes" is array which stores indexes of the array "found".
     660             :  * In other words:
     661             :  * trg2indexes[j] = i;
     662             :  * found[i] = true (or false);
     663             :  * If found[i] == true then there is trigram trg2[j] in array "trg1".
     664             :  * If found[i] == false then there is not trigram trg2[j] in array "trg1".
     665             :  *
     666             :  * str1: search pattern string, of length slen1 bytes.
     667             :  * str2: text in which we are looking for a word, of length slen2 bytes.
     668             :  * flags: set of boolean flags parameterizing similarity calculation.
     669             :  *
     670             :  * Returns word similarity.
     671             :  */
     672             : static float4
     673       24252 : calc_word_similarity(char *str1, int slen1, char *str2, int slen2,
     674             :                      uint8 flags)
     675             : {
     676             :     bool       *found;
     677             :     pos_trgm   *ptrg;
     678             :     trgm       *trg1;
     679             :     trgm       *trg2;
     680             :     int         len1,
     681             :                 len2,
     682             :                 len,
     683             :                 i,
     684             :                 j,
     685             :                 ulen1;
     686             :     int        *trg2indexes;
     687             :     float4      result;
     688             :     TrgmBound  *bounds;
     689             : 
     690       24252 :     protect_out_of_mem(slen1 + slen2);
     691             : 
     692             :     /* Make positional trigrams */
     693       24252 :     trg1 = (trgm *) palloc(sizeof(trgm) * (slen1 / 2 + 1) * 3);
     694       24252 :     trg2 = (trgm *) palloc(sizeof(trgm) * (slen2 / 2 + 1) * 3);
     695       24252 :     if (flags & WORD_SIMILARITY_STRICT)
     696       13324 :         bounds = (TrgmBound *) palloc0(sizeof(TrgmBound) * (slen2 / 2 + 1) * 3);
     697             :     else
     698       10928 :         bounds = NULL;
     699             : 
     700       24252 :     len1 = generate_trgm_only(trg1, str1, slen1, NULL);
     701       24252 :     len2 = generate_trgm_only(trg2, str2, slen2, bounds);
     702             : 
     703       24252 :     ptrg = make_positional_trgm(trg1, len1, trg2, len2);
     704       24252 :     len = len1 + len2;
     705       24252 :     qsort(ptrg, len, sizeof(pos_trgm), comp_ptrgm);
     706             : 
     707       24252 :     pfree(trg1);
     708       24252 :     pfree(trg2);
     709             : 
     710             :     /*
     711             :      * Merge positional trigrams array: enumerate each trigram and find its
     712             :      * presence in required word.
     713             :      */
     714       24252 :     trg2indexes = (int *) palloc(sizeof(int) * len2);
     715       24252 :     found = (bool *) palloc0(sizeof(bool) * len);
     716             : 
     717       24252 :     ulen1 = 0;
     718       24252 :     j = 0;
     719      601884 :     for (i = 0; i < len; i++)
     720             :     {
     721      577632 :         if (i > 0)
     722             :         {
     723      553380 :             int         cmp = CMPTRGM(ptrg[i - 1].trg, ptrg[i].trg);
     724             : 
     725      553380 :             if (cmp != 0)
     726             :             {
     727      484976 :                 if (found[j])
     728      202276 :                     ulen1++;
     729      484976 :                 j++;
     730             :             }
     731             :         }
     732             : 
     733      577632 :         if (ptrg[i].index >= 0)
     734             :         {
     735      360156 :             trg2indexes[ptrg[i].index] = j;
     736             :         }
     737             :         else
     738             :         {
     739      217476 :             found[j] = true;
     740             :         }
     741             :     }
     742       24252 :     if (found[j])
     743       15200 :         ulen1++;
     744             : 
     745             :     /* Run iterative procedure to find maximum similarity with word */
     746       24252 :     result = iterate_word_similarity(trg2indexes, found, ulen1, len2, len,
     747             :                                      flags, bounds);
     748             : 
     749       24252 :     pfree(trg2indexes);
     750       24252 :     pfree(found);
     751       24252 :     pfree(ptrg);
     752             : 
     753       24252 :     return result;
     754             : }
     755             : 
     756             : 
     757             : /*
     758             :  * Extract the next non-wildcard part of a search string, i.e. a word bounded
     759             :  * by '_' or '%' meta-characters, non-word characters or string end.
     760             :  *
     761             :  * str: source string, of length lenstr bytes (need not be null-terminated)
     762             :  * buf: where to return the substring (must be long enough)
     763             :  * *bytelen: receives byte length of the found substring
     764             :  * *charlen: receives character length of the found substring
     765             :  *
     766             :  * Returns pointer to end+1 of the found substring in the source string.
     767             :  * Returns NULL if no word found (in which case buf, bytelen, charlen not set)
     768             :  *
     769             :  * If the found word is bounded by non-word characters or string boundaries
     770             :  * then this function will include corresponding padding spaces into buf.
     771             :  */
     772             : static const char *
     773         238 : get_wildcard_part(const char *str, int lenstr,
     774             :                   char *buf, int *bytelen, int *charlen)
     775             : {
     776         238 :     const char *beginword = str;
     777             :     const char *endword;
     778         238 :     char       *s = buf;
     779         238 :     bool        in_leading_wildcard_meta = false;
     780         238 :     bool        in_trailing_wildcard_meta = false;
     781         238 :     bool        in_escape = false;
     782             :     int         clen;
     783             : 
     784             :     /*
     785             :      * Find the first word character, remembering whether preceding character
     786             :      * was wildcard meta-character.  Note that the in_escape state persists
     787             :      * from this loop to the next one, since we may exit at a word character
     788             :      * that is in_escape.
     789             :      */
     790         482 :     while (beginword - str < lenstr)
     791             :     {
     792         372 :         if (in_escape)
     793             :         {
     794           6 :             if (ISWORDCHR(beginword))
     795           6 :                 break;
     796           0 :             in_escape = false;
     797           0 :             in_leading_wildcard_meta = false;
     798             :         }
     799             :         else
     800             :         {
     801         366 :             if (ISESCAPECHAR(beginword))
     802           6 :                 in_escape = true;
     803         360 :             else if (ISWILDCARDCHAR(beginword))
     804         208 :                 in_leading_wildcard_meta = true;
     805         152 :             else if (ISWORDCHR(beginword))
     806         122 :                 break;
     807             :             else
     808          30 :                 in_leading_wildcard_meta = false;
     809             :         }
     810         244 :         beginword += pg_mblen(beginword);
     811             :     }
     812             : 
     813             :     /*
     814             :      * Handle string end.
     815             :      */
     816         238 :     if (beginword - str >= lenstr)
     817         110 :         return NULL;
     818             : 
     819             :     /*
     820             :      * Add left padding spaces if preceding character wasn't wildcard
     821             :      * meta-character.
     822             :      */
     823         128 :     *charlen = 0;
     824         128 :     if (!in_leading_wildcard_meta)
     825             :     {
     826             :         if (LPADDING > 0)
     827             :         {
     828          30 :             *s++ = ' ';
     829          30 :             (*charlen)++;
     830             :             if (LPADDING > 1)
     831             :             {
     832          30 :                 *s++ = ' ';
     833          30 :                 (*charlen)++;
     834             :             }
     835             :         }
     836             :     }
     837             : 
     838             :     /*
     839             :      * Copy data into buf until wildcard meta-character, non-word character or
     840             :      * string boundary.  Strip escapes during copy.
     841             :      */
     842         128 :     endword = beginword;
     843         488 :     while (endword - str < lenstr)
     844             :     {
     845         488 :         clen = pg_mblen(endword);
     846         488 :         if (in_escape)
     847             :         {
     848           6 :             if (ISWORDCHR(endword))
     849             :             {
     850           6 :                 memcpy(s, endword, clen);
     851           6 :                 (*charlen)++;
     852           6 :                 s += clen;
     853             :             }
     854             :             else
     855             :             {
     856             :                 /*
     857             :                  * Back up endword to the escape character when stopping at an
     858             :                  * escaped char, so that subsequent get_wildcard_part will
     859             :                  * restart from the escape character.  We assume here that
     860             :                  * escape chars are single-byte.
     861             :                  */
     862           0 :                 endword--;
     863           0 :                 break;
     864             :             }
     865           6 :             in_escape = false;
     866             :         }
     867             :         else
     868             :         {
     869         482 :             if (ISESCAPECHAR(endword))
     870           0 :                 in_escape = true;
     871         482 :             else if (ISWILDCARDCHAR(endword))
     872             :             {
     873         110 :                 in_trailing_wildcard_meta = true;
     874         110 :                 break;
     875             :             }
     876         372 :             else if (ISWORDCHR(endword))
     877             :             {
     878         354 :                 memcpy(s, endword, clen);
     879         354 :                 (*charlen)++;
     880         354 :                 s += clen;
     881             :             }
     882             :             else
     883          18 :                 break;
     884             :         }
     885         360 :         endword += clen;
     886             :     }
     887             : 
     888             :     /*
     889             :      * Add right padding spaces if next character isn't wildcard
     890             :      * meta-character.
     891             :      */
     892         128 :     if (!in_trailing_wildcard_meta)
     893             :     {
     894             :         if (RPADDING > 0)
     895             :         {
     896          18 :             *s++ = ' ';
     897          18 :             (*charlen)++;
     898             :             if (RPADDING > 1)
     899             :             {
     900             :                 *s++ = ' ';
     901             :                 (*charlen)++;
     902             :             }
     903             :         }
     904             :     }
     905             : 
     906         128 :     *bytelen = s - buf;
     907         128 :     return endword;
     908             : }
     909             : 
     910             : /*
     911             :  * Generates trigrams for wildcard search string.
     912             :  *
     913             :  * Returns array of trigrams that must occur in any string that matches the
     914             :  * wildcard string.  For example, given pattern "a%bcd%" the trigrams
     915             :  * " a", "bcd" would be extracted.
     916             :  */
     917             : TRGM *
     918         110 : generate_wildcard_trgm(const char *str, int slen)
     919             : {
     920             :     TRGM       *trg;
     921             :     char       *buf,
     922             :                *buf2;
     923             :     trgm       *tptr;
     924             :     int         len,
     925             :                 charlen,
     926             :                 bytelen;
     927             :     const char *eword;
     928             : 
     929         110 :     protect_out_of_mem(slen);
     930             : 
     931         110 :     trg = (TRGM *) palloc(TRGMHDRSIZE + sizeof(trgm) * (slen / 2 + 1) * 3);
     932         110 :     trg->flag = ARRKEY;
     933         110 :     SET_VARSIZE(trg, TRGMHDRSIZE);
     934             : 
     935         110 :     if (slen + LPADDING + RPADDING < 3 || slen == 0)
     936           0 :         return trg;
     937             : 
     938         110 :     tptr = GETARR(trg);
     939             : 
     940             :     /* Allocate a buffer for blank-padded, but not yet case-folded, words */
     941         110 :     buf = palloc(sizeof(char) * (slen + 4));
     942             : 
     943             :     /*
     944             :      * Extract trigrams from each substring extracted by get_wildcard_part.
     945             :      */
     946         110 :     eword = str;
     947         238 :     while ((eword = get_wildcard_part(eword, slen - (eword - str),
     948         238 :                                       buf, &bytelen, &charlen)) != NULL)
     949             :     {
     950             : #ifdef IGNORECASE
     951         128 :         buf2 = str_tolower(buf, bytelen, DEFAULT_COLLATION_OID);
     952         128 :         bytelen = strlen(buf2);
     953             : #else
     954             :         buf2 = buf;
     955             : #endif
     956             : 
     957             :         /*
     958             :          * count trigrams
     959             :          */
     960         128 :         tptr = make_trigrams(tptr, buf2, bytelen, charlen);
     961             : 
     962             : #ifdef IGNORECASE
     963         128 :         pfree(buf2);
     964             : #endif
     965             :     }
     966             : 
     967         110 :     pfree(buf);
     968             : 
     969         110 :     if ((len = tptr - GETARR(trg)) == 0)
     970          48 :         return trg;
     971             : 
     972             :     /*
     973             :      * Make trigrams unique.
     974             :      */
     975          62 :     if (len > 1)
     976             :     {
     977          34 :         qsort(GETARR(trg), len, sizeof(trgm), comp_trgm);
     978          34 :         len = qunique(GETARR(trg), len, sizeof(trgm), comp_trgm);
     979             :     }
     980             : 
     981          62 :     SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len));
     982             : 
     983          62 :     return trg;
     984             : }
     985             : 
     986             : uint32
     987       69546 : trgm2int(trgm *ptr)
     988             : {
     989       69546 :     uint32      val = 0;
     990             : 
     991       69546 :     val |= *(((unsigned char *) ptr));
     992       69546 :     val <<= 8;
     993       69546 :     val |= *(((unsigned char *) ptr) + 1);
     994       69546 :     val <<= 8;
     995       69546 :     val |= *(((unsigned char *) ptr) + 2);
     996             : 
     997       69546 :     return val;
     998             : }
     999             : 
    1000             : Datum
    1001          14 : show_trgm(PG_FUNCTION_ARGS)
    1002             : {
    1003          14 :     text       *in = PG_GETARG_TEXT_PP(0);
    1004             :     TRGM       *trg;
    1005             :     Datum      *d;
    1006             :     ArrayType  *a;
    1007             :     trgm       *ptr;
    1008             :     int         i;
    1009             : 
    1010          14 :     trg = generate_trgm(VARDATA_ANY(in), VARSIZE_ANY_EXHDR(in));
    1011          14 :     d = (Datum *) palloc(sizeof(Datum) * (1 + ARRNELEM(trg)));
    1012             : 
    1013          88 :     for (i = 0, ptr = GETARR(trg); i < ARRNELEM(trg); i++, ptr++)
    1014             :     {
    1015          74 :         text       *item = (text *) palloc(VARHDRSZ + Max(12, pg_database_encoding_max_length() * 3));
    1016             : 
    1017          74 :         if (pg_database_encoding_max_length() > 1 && !ISPRINTABLETRGM(ptr))
    1018             :         {
    1019           0 :             snprintf(VARDATA(item), 12, "0x%06x", trgm2int(ptr));
    1020           0 :             SET_VARSIZE(item, VARHDRSZ + strlen(VARDATA(item)));
    1021             :         }
    1022             :         else
    1023             :         {
    1024          74 :             SET_VARSIZE(item, VARHDRSZ + 3);
    1025          74 :             CPTRGM(VARDATA(item), ptr);
    1026             :         }
    1027          74 :         d[i] = PointerGetDatum(item);
    1028             :     }
    1029             : 
    1030          14 :     a = construct_array_builtin(d, ARRNELEM(trg), TEXTOID);
    1031             : 
    1032          88 :     for (i = 0; i < ARRNELEM(trg); i++)
    1033          74 :         pfree(DatumGetPointer(d[i]));
    1034             : 
    1035          14 :     pfree(d);
    1036          14 :     pfree(trg);
    1037          14 :     PG_FREE_IF_COPY(in, 0);
    1038             : 
    1039          14 :     PG_RETURN_POINTER(a);
    1040             : }
    1041             : 
    1042             : float4
    1043      137438 : cnt_sml(TRGM *trg1, TRGM *trg2, bool inexact)
    1044             : {
    1045             :     trgm       *ptr1,
    1046             :                *ptr2;
    1047      137438 :     int         count = 0;
    1048             :     int         len1,
    1049             :                 len2;
    1050             : 
    1051      137438 :     ptr1 = GETARR(trg1);
    1052      137438 :     ptr2 = GETARR(trg2);
    1053             : 
    1054      137438 :     len1 = ARRNELEM(trg1);
    1055      137438 :     len2 = ARRNELEM(trg2);
    1056             : 
    1057             :     /* explicit test is needed to avoid 0/0 division when both lengths are 0 */
    1058      137438 :     if (len1 <= 0 || len2 <= 0)
    1059           2 :         return (float4) 0.0;
    1060             : 
    1061     1748726 :     while (ptr1 - GETARR(trg1) < len1 && ptr2 - GETARR(trg2) < len2)
    1062             :     {
    1063     1611290 :         int         res = CMPTRGM(ptr1, ptr2);
    1064             : 
    1065     1611290 :         if (res < 0)
    1066      363678 :             ptr1++;
    1067     1247612 :         else if (res > 0)
    1068      425414 :             ptr2++;
    1069             :         else
    1070             :         {
    1071      822198 :             ptr1++;
    1072      822198 :             ptr2++;
    1073      822198 :             count++;
    1074             :         }
    1075             :     }
    1076             : 
    1077             :     /*
    1078             :      * If inexact then len2 is equal to count, because we don't know actual
    1079             :      * length of second string in inexact search and we can assume that count
    1080             :      * is a lower bound of len2.
    1081             :      */
    1082      137436 :     return CALCSML(count, len1, inexact ? count : len2);
    1083             : }
    1084             : 
    1085             : 
    1086             : /*
    1087             :  * Returns whether trg2 contains all trigrams in trg1.
    1088             :  * This relies on the trigram arrays being sorted.
    1089             :  */
    1090             : bool
    1091         380 : trgm_contained_by(TRGM *trg1, TRGM *trg2)
    1092             : {
    1093             :     trgm       *ptr1,
    1094             :                *ptr2;
    1095             :     int         len1,
    1096             :                 len2;
    1097             : 
    1098         380 :     ptr1 = GETARR(trg1);
    1099         380 :     ptr2 = GETARR(trg2);
    1100             : 
    1101         380 :     len1 = ARRNELEM(trg1);
    1102         380 :     len2 = ARRNELEM(trg2);
    1103             : 
    1104        1244 :     while (ptr1 - GETARR(trg1) < len1 && ptr2 - GETARR(trg2) < len2)
    1105             :     {
    1106        1198 :         int         res = CMPTRGM(ptr1, ptr2);
    1107             : 
    1108        1198 :         if (res < 0)
    1109         334 :             return false;
    1110         864 :         else if (res > 0)
    1111         640 :             ptr2++;
    1112             :         else
    1113             :         {
    1114         224 :             ptr1++;
    1115         224 :             ptr2++;
    1116             :         }
    1117             :     }
    1118          46 :     if (ptr1 - GETARR(trg1) < len1)
    1119           8 :         return false;
    1120             :     else
    1121          38 :         return true;
    1122             : }
    1123             : 
    1124             : /*
    1125             :  * Return a palloc'd boolean array showing, for each trigram in "query",
    1126             :  * whether it is present in the trigram array "key".
    1127             :  * This relies on the "key" array being sorted, but "query" need not be.
    1128             :  */
    1129             : bool *
    1130        4300 : trgm_presence_map(TRGM *query, TRGM *key)
    1131             : {
    1132             :     bool       *result;
    1133        4300 :     trgm       *ptrq = GETARR(query),
    1134        4300 :                *ptrk = GETARR(key);
    1135        4300 :     int         lenq = ARRNELEM(query),
    1136        4300 :                 lenk = ARRNELEM(key),
    1137             :                 i;
    1138             : 
    1139        4300 :     result = (bool *) palloc0(lenq * sizeof(bool));
    1140             : 
    1141             :     /* for each query trigram, do a binary search in the key array */
    1142     1015120 :     for (i = 0; i < lenq; i++)
    1143             :     {
    1144     1010820 :         int         lo = 0;
    1145     1010820 :         int         hi = lenk;
    1146             : 
    1147     4747306 :         while (lo < hi)
    1148             :         {
    1149     3752564 :             int         mid = (lo + hi) / 2;
    1150     3752564 :             int         res = CMPTRGM(ptrq, ptrk + mid);
    1151             : 
    1152     3752564 :             if (res < 0)
    1153     1568164 :                 hi = mid;
    1154     2184400 :             else if (res > 0)
    1155     2168322 :                 lo = mid + 1;
    1156             :             else
    1157             :             {
    1158       16078 :                 result[i] = true;
    1159       16078 :                 break;
    1160             :             }
    1161             :         }
    1162     1010820 :         ptrq++;
    1163             :     }
    1164             : 
    1165        4300 :     return result;
    1166             : }
    1167             : 
    1168             : Datum
    1169       62904 : similarity(PG_FUNCTION_ARGS)
    1170             : {
    1171       62904 :     text       *in1 = PG_GETARG_TEXT_PP(0);
    1172       62904 :     text       *in2 = PG_GETARG_TEXT_PP(1);
    1173             :     TRGM       *trg1,
    1174             :                *trg2;
    1175             :     float4      res;
    1176             : 
    1177       62904 :     trg1 = generate_trgm(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1));
    1178       62904 :     trg2 = generate_trgm(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2));
    1179             : 
    1180       62904 :     res = cnt_sml(trg1, trg2, false);
    1181             : 
    1182       62904 :     pfree(trg1);
    1183       62904 :     pfree(trg2);
    1184       62904 :     PG_FREE_IF_COPY(in1, 0);
    1185       62904 :     PG_FREE_IF_COPY(in2, 1);
    1186             : 
    1187       62904 :     PG_RETURN_FLOAT4(res);
    1188             : }
    1189             : 
    1190             : Datum
    1191        1804 : word_similarity(PG_FUNCTION_ARGS)
    1192             : {
    1193        1804 :     text       *in1 = PG_GETARG_TEXT_PP(0);
    1194        1804 :     text       *in2 = PG_GETARG_TEXT_PP(1);
    1195             :     float4      res;
    1196             : 
    1197        1804 :     res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
    1198        1804 :                                VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
    1199             :                                0);
    1200             : 
    1201        1804 :     PG_FREE_IF_COPY(in1, 0);
    1202        1804 :     PG_FREE_IF_COPY(in2, 1);
    1203        1804 :     PG_RETURN_FLOAT4(res);
    1204             : }
    1205             : 
    1206             : Datum
    1207        1764 : strict_word_similarity(PG_FUNCTION_ARGS)
    1208             : {
    1209        1764 :     text       *in1 = PG_GETARG_TEXT_PP(0);
    1210        1764 :     text       *in2 = PG_GETARG_TEXT_PP(1);
    1211             :     float4      res;
    1212             : 
    1213        1764 :     res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
    1214        1764 :                                VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
    1215             :                                WORD_SIMILARITY_STRICT);
    1216             : 
    1217        1764 :     PG_FREE_IF_COPY(in1, 0);
    1218        1764 :     PG_FREE_IF_COPY(in2, 1);
    1219        1764 :     PG_RETURN_FLOAT4(res);
    1220             : }
    1221             : 
    1222             : Datum
    1223        2008 : similarity_dist(PG_FUNCTION_ARGS)
    1224             : {
    1225        2008 :     float4      res = DatumGetFloat4(DirectFunctionCall2(similarity,
    1226             :                                                          PG_GETARG_DATUM(0),
    1227             :                                                          PG_GETARG_DATUM(1)));
    1228             : 
    1229        2008 :     PG_RETURN_FLOAT4(1.0 - res);
    1230             : }
    1231             : 
    1232             : Datum
    1233       12000 : similarity_op(PG_FUNCTION_ARGS)
    1234             : {
    1235       12000 :     float4      res = DatumGetFloat4(DirectFunctionCall2(similarity,
    1236             :                                                          PG_GETARG_DATUM(0),
    1237             :                                                          PG_GETARG_DATUM(1)));
    1238             : 
    1239       12000 :     PG_RETURN_BOOL(res >= similarity_threshold);
    1240             : }
    1241             : 
    1242             : Datum
    1243        3848 : word_similarity_op(PG_FUNCTION_ARGS)
    1244             : {
    1245        3848 :     text       *in1 = PG_GETARG_TEXT_PP(0);
    1246        3848 :     text       *in2 = PG_GETARG_TEXT_PP(1);
    1247             :     float4      res;
    1248             : 
    1249        3848 :     res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
    1250        3848 :                                VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
    1251             :                                WORD_SIMILARITY_CHECK_ONLY);
    1252             : 
    1253        3848 :     PG_FREE_IF_COPY(in1, 0);
    1254        3848 :     PG_FREE_IF_COPY(in2, 1);
    1255        3848 :     PG_RETURN_BOOL(res >= word_similarity_threshold);
    1256             : }
    1257             : 
    1258             : Datum
    1259        3848 : word_similarity_commutator_op(PG_FUNCTION_ARGS)
    1260             : {
    1261        3848 :     text       *in1 = PG_GETARG_TEXT_PP(0);
    1262        3848 :     text       *in2 = PG_GETARG_TEXT_PP(1);
    1263             :     float4      res;
    1264             : 
    1265        3848 :     res = calc_word_similarity(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
    1266        3848 :                                VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
    1267             :                                WORD_SIMILARITY_CHECK_ONLY);
    1268             : 
    1269        3848 :     PG_FREE_IF_COPY(in1, 0);
    1270        3848 :     PG_FREE_IF_COPY(in2, 1);
    1271        3848 :     PG_RETURN_BOOL(res >= word_similarity_threshold);
    1272             : }
    1273             : 
    1274             : Datum
    1275           0 : word_similarity_dist_op(PG_FUNCTION_ARGS)
    1276             : {
    1277           0 :     text       *in1 = PG_GETARG_TEXT_PP(0);
    1278           0 :     text       *in2 = PG_GETARG_TEXT_PP(1);
    1279             :     float4      res;
    1280             : 
    1281           0 :     res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
    1282           0 :                                VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
    1283             :                                0);
    1284             : 
    1285           0 :     PG_FREE_IF_COPY(in1, 0);
    1286           0 :     PG_FREE_IF_COPY(in2, 1);
    1287           0 :     PG_RETURN_FLOAT4(1.0 - res);
    1288             : }
    1289             : 
    1290             : Datum
    1291        1428 : word_similarity_dist_commutator_op(PG_FUNCTION_ARGS)
    1292             : {
    1293        1428 :     text       *in1 = PG_GETARG_TEXT_PP(0);
    1294        1428 :     text       *in2 = PG_GETARG_TEXT_PP(1);
    1295             :     float4      res;
    1296             : 
    1297        1428 :     res = calc_word_similarity(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
    1298        1428 :                                VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
    1299             :                                0);
    1300             : 
    1301        1428 :     PG_FREE_IF_COPY(in1, 0);
    1302        1428 :     PG_FREE_IF_COPY(in2, 1);
    1303        1428 :     PG_RETURN_FLOAT4(1.0 - res);
    1304             : }
    1305             : 
    1306             : Datum
    1307        5060 : strict_word_similarity_op(PG_FUNCTION_ARGS)
    1308             : {
    1309        5060 :     text       *in1 = PG_GETARG_TEXT_PP(0);
    1310        5060 :     text       *in2 = PG_GETARG_TEXT_PP(1);
    1311             :     float4      res;
    1312             : 
    1313        5060 :     res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
    1314        5060 :                                VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
    1315             :                                WORD_SIMILARITY_CHECK_ONLY | WORD_SIMILARITY_STRICT);
    1316             : 
    1317        5060 :     PG_FREE_IF_COPY(in1, 0);
    1318        5060 :     PG_FREE_IF_COPY(in2, 1);
    1319        5060 :     PG_RETURN_BOOL(res >= strict_word_similarity_threshold);
    1320             : }
    1321             : 
    1322             : Datum
    1323        5060 : strict_word_similarity_commutator_op(PG_FUNCTION_ARGS)
    1324             : {
    1325        5060 :     text       *in1 = PG_GETARG_TEXT_PP(0);
    1326        5060 :     text       *in2 = PG_GETARG_TEXT_PP(1);
    1327             :     float4      res;
    1328             : 
    1329        5060 :     res = calc_word_similarity(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
    1330        5060 :                                VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
    1331             :                                WORD_SIMILARITY_CHECK_ONLY | WORD_SIMILARITY_STRICT);
    1332             : 
    1333        5060 :     PG_FREE_IF_COPY(in1, 0);
    1334        5060 :     PG_FREE_IF_COPY(in2, 1);
    1335        5060 :     PG_RETURN_BOOL(res >= strict_word_similarity_threshold);
    1336             : }
    1337             : 
    1338             : Datum
    1339           0 : strict_word_similarity_dist_op(PG_FUNCTION_ARGS)
    1340             : {
    1341           0 :     text       *in1 = PG_GETARG_TEXT_PP(0);
    1342           0 :     text       *in2 = PG_GETARG_TEXT_PP(1);
    1343             :     float4      res;
    1344             : 
    1345           0 :     res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
    1346           0 :                                VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
    1347             :                                WORD_SIMILARITY_STRICT);
    1348             : 
    1349           0 :     PG_FREE_IF_COPY(in1, 0);
    1350           0 :     PG_FREE_IF_COPY(in2, 1);
    1351           0 :     PG_RETURN_FLOAT4(1.0 - res);
    1352             : }
    1353             : 
    1354             : Datum
    1355        1440 : strict_word_similarity_dist_commutator_op(PG_FUNCTION_ARGS)
    1356             : {
    1357        1440 :     text       *in1 = PG_GETARG_TEXT_PP(0);
    1358        1440 :     text       *in2 = PG_GETARG_TEXT_PP(1);
    1359             :     float4      res;
    1360             : 
    1361        1440 :     res = calc_word_similarity(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
    1362        1440 :                                VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
    1363             :                                WORD_SIMILARITY_STRICT);
    1364             : 
    1365        1440 :     PG_FREE_IF_COPY(in1, 0);
    1366        1440 :     PG_FREE_IF_COPY(in2, 1);
    1367        1440 :     PG_RETURN_FLOAT4(1.0 - res);
    1368             : }

Generated by: LCOV version 1.16